This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 754a028  Set Helix flapping window for all components (#4105)
754a028 is described below

commit 754a028f7bc22f0dfe68164ac300f31fdb946bc7
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Mon Apr 15 16:35:29 2019 -0700

    Set Helix flapping window for all components (#4105)
    
    Helix will disconnect the manager and disable the instance if it
    detects flapping (too frequent disconnect from ZooKeeper). Setting
    flapping time window to a small value can avoid this from
    happening. Helix ignores the non-positive value, so set the default
    value as 1.
    
    Move the config into CommonConstants
    - For server, keep the old behavior
    - For broker, change default value from 0 (not supported) to 1
    - For controller and minion, add this system property
---
 .../broker/helix/DefaultHelixBrokerConfig.java     |  5 ---
 .../broker/broker/helix/HelixBrokerStarter.java    | 11 +++---
 .../apache/pinot/common/utils/CommonConstants.java | 27 ++++++++------
 .../apache/pinot/controller/ControllerStarter.java | 42 ++++++++++++++--------
 .../org/apache/pinot/minion/MinionStarter.java     | 11 ++++++
 .../helix/DefaultHelixStarterServerConfig.java     |  3 --
 .../server/starter/helix/HelixServerStarter.java   | 28 +++++++--------
 7 files changed, 74 insertions(+), 53 deletions(-)

diff --git 
a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/DefaultHelixBrokerConfig.java
 
b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/DefaultHelixBrokerConfig.java
index c264598..e7ab072 100644
--- 
a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/DefaultHelixBrokerConfig.java
+++ 
b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/DefaultHelixBrokerConfig.java
@@ -24,8 +24,6 @@ import 
org.apache.commons.configuration.PropertiesConfiguration;
 
 
 public class DefaultHelixBrokerConfig {
-  public static final String HELIX_FLAPPING_TIME_WINDOW_NAME = 
"pinot.broker.helix.flappingTimeWindowMs";
-  public static final String DEFAULT_HELIX_FLAPPING_TIMEIWINDWOW_MS = "0";
 
   public static Configuration getDefaultBrokerConf() {
     Configuration brokerConf = new PropertiesConfiguration();
@@ -40,9 +38,6 @@ public class DefaultHelixBrokerConfig {
     //client properties
     brokerConf.addProperty("pinot.broker.client.queryPort", "8099");
 
-    // [PINOT-2435] setting to 0 so it doesn't disconnect from zk
-    brokerConf.addProperty("pinot.broker.helix.flappingTimeWindowMs", "0");
-
     return brokerConf;
   }
 
diff --git 
a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/HelixBrokerStarter.java
 
b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/HelixBrokerStarter.java
index 337a6c3..081d891 100644
--- 
a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/HelixBrokerStarter.java
+++ 
b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/HelixBrokerStarter.java
@@ -34,6 +34,7 @@ import org.apache.helix.HelixDataAccessor;
 import org.apache.helix.HelixManager;
 import org.apache.helix.HelixManagerFactory;
 import org.apache.helix.InstanceType;
+import org.apache.helix.SystemPropertyKeys;
 import org.apache.helix.ZNRecord;
 import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.model.Message;
@@ -185,10 +186,12 @@ public class HelixBrokerStarter {
   }
 
   private void setupHelixSystemProperties() {
-    final String helixFlappingTimeWindowPropName = 
"helixmanager.flappingTimeWindow";
-    System.setProperty(helixFlappingTimeWindowPropName, _pinotHelixProperties
-        .getString(DefaultHelixBrokerConfig.HELIX_FLAPPING_TIME_WINDOW_NAME,
-            DefaultHelixBrokerConfig.DEFAULT_HELIX_FLAPPING_TIMEIWINDWOW_MS));
+    // NOTE: Helix will disconnect the manager and disable the instance if it 
detects flapping (too frequent disconnect
+    // from ZooKeeper). Setting flapping time window to a small value can 
avoid this from happening. Helix ignores the
+    // non-positive value, so set the default value as 1.
+    System.setProperty(SystemPropertyKeys.FLAPPING_TIME_WINDOW, 
_pinotHelixProperties
+        
.getString(CommonConstants.Helix.CONFIG_OF_BROKER_FLAPPING_TIME_WINDOW_MS,
+            CommonConstants.Helix.DEFAULT_FLAPPING_TIME_WINDOW_MS));
   }
 
   private void addInstanceTagIfNeeded(String clusterName, String instanceName) 
{
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java 
b/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
index 137288c..62ed53f 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
@@ -98,13 +98,14 @@ public class CommonConstants {
     public static final int DEFAULT_BROKER_QUERY_PORT = 8099;
     public static final String KEY_OF_SERVER_NETTY_HOST = 
"pinot.server.netty.host";
 
-    public static final String HELIX_MANAGER_FLAPPING_TIME_WINDOW_KEY = 
"helixmanager.flappingTimeWindow";
-    public static final String HELIX_MANAGER_MAX_DISCONNECT_THRESHOLD_KEY = 
"helixmanager.maxDisconnectThreshold";
-    public static final String CONFIG_OF_HELIX_FLAPPING_TIMEWINDOW_MS = 
"pinot.server.flapping.timeWindowMs";
-    public static final String CONFIG_OF_HELIX_MAX_DISCONNECT_THRESHOLD =
-        "pinot.server.flapping.maxDisconnectThreshold";
-    public static final String DEFAULT_HELIX_FLAPPING_TIMEWINDOW_MS = "1";
-    public static final String DEFAULT_HELIX_FLAPPING_MAX_DISCONNECT_THRESHOLD 
= "100";
+    // NOTE: Helix will disconnect the manager and disable the instance if it 
detects flapping (too frequent disconnect
+    // from ZooKeeper). Setting flapping time window to a small value can 
avoid this from happening. Helix ignores the
+    // non-positive value, so set the default value as 1.
+    public static final String CONFIG_OF_CONTROLLER_FLAPPING_TIME_WINDOW_MS = 
"pinot.controller.flapping.timeWindowMs";
+    public static final String CONFIG_OF_BROKER_FLAPPING_TIME_WINDOW_MS = 
"pinot.broker.flapping.timeWindowMs";
+    public static final String CONFIG_OF_SERVER_FLAPPING_TIME_WINDOW_MS = 
"pinot.server.flapping.timeWindowMs";
+    public static final String CONFIG_OF_MINION_FLAPPING_TIME_WINDOW_MS = 
"pinot.minion.flapping.timeWindowMs";
+    public static final String DEFAULT_FLAPPING_TIME_WINDOW_MS = "1";
   }
 
   public static class Broker {
@@ -112,7 +113,8 @@ public class CommonConstants {
     public static final int DEFAULT_BROKER_QUERY_RESPONSE_LIMIT = 
Integer.MAX_VALUE;
     public static final String CONFIG_OF_BROKER_QUERY_LOG_LENGTH = 
"pinot.broker.query.log.length";
     public static final int DEFAULT_BROKER_QUERY_LOG_LENGTH = 
Integer.MAX_VALUE;
-    public static final String CONFIG_OF_BROKER_QUERY_LOG_MAX_RATE_PER_SECOND 
= "pinot.broker.query.log.maxRatePerSecond";
+    public static final String CONFIG_OF_BROKER_QUERY_LOG_MAX_RATE_PER_SECOND =
+        "pinot.broker.query.log.maxRatePerSecond";
     public static final double DEFAULT_BROKER_QUERY_LOG_MAX_RATE_PER_SECOND = 
10_000d;
     public static final String CONFIG_OF_BROKER_TIMEOUT_MS = 
"pinot.broker.timeoutMs";
     public static final long DEFAULT_BROKER_TIMEOUT_MS = 10_000L;
@@ -127,7 +129,8 @@ public class CommonConstants {
     // Configuration to consider the broker ServiceStatus as being STARTED if 
the percent of resources (tables) that
     // are ONLINE for this this broker has crossed the threshold percentage of 
the total number of tables
     // that it is expected to serve.
-    public static final String CONFIG_OF_BROKER_MIN_RESOURCE_PERCENT_FOR_START 
= "pinot.broker.startup.minResourcePercent";
+    public static final String CONFIG_OF_BROKER_MIN_RESOURCE_PERCENT_FOR_START 
=
+        "pinot.broker.startup.minResourcePercent";
     public static final double DEFAULT_BROKER_MIN_RESOURCE_PERCENT_FOR_START = 
100.0;
 
     public static class Request {
@@ -162,7 +165,8 @@ public class CommonConstants {
     public static final String CONFIG_OF_ENABLE_DEFAULT_COLUMNS = 
"pinot.server.instance.enable.default.columns";
     public static final String CONFIG_OF_ENABLE_SHUTDOWN_DELAY = 
"pinot.server.instance.enable.shutdown.delay";
     public static final String CONFIG_OF_ENABLE_SPLIT_COMMIT = 
"pinot.server.instance.enable.split.commit";
-    public static final String CONFIG_OF_ENABLE_COMMIT_END_WITH_METADATA = 
"pinot.server.instance.enable.commitend.metadata";
+    public static final String CONFIG_OF_ENABLE_COMMIT_END_WITH_METADATA =
+        "pinot.server.instance.enable.commitend.metadata";
     public static final String CONFIG_OF_REALTIME_OFFHEAP_ALLOCATION = 
"pinot.server.instance.realtime.alloc.offheap";
     public static final String CONFIG_OF_REALTIME_OFFHEAP_DIRECT_ALLOCATION =
         "pinot.server.instance.realtime.alloc.offheap.direct";
@@ -175,7 +179,8 @@ public class CommonConstants {
     // Configuration to consider the server ServiceStatus as being STARTED if 
the percent of resources (tables) that
     // are ONLINE for this this server has crossed the threshold percentage of 
the total number of tables
     // that it is expected to serve.
-    public static final String CONFIG_OF_SERVER_MIN_RESOURCE_PERCENT_FOR_START 
= "pinot.server.startup.minResourcePercent";
+    public static final String CONFIG_OF_SERVER_MIN_RESOURCE_PERCENT_FOR_START 
=
+        "pinot.server.startup.minResourcePercent";
     public static final double DEFAULT_SERVER_MIN_RESOURCE_PERCENT_FOR_START = 
100.0;
 
     public static final int DEFAULT_ADMIN_API_PORT = 8097;
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerStarter.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerStarter.java
index 10c1c00..9e8a699 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerStarter.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerStarter.java
@@ -36,6 +36,7 @@ import org.apache.commons.httpclient.HttpConnectionManager;
 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
 import org.apache.commons.io.FileUtils;
 import org.apache.helix.HelixManager;
+import org.apache.helix.SystemPropertyKeys;
 import org.apache.helix.task.TaskDriver;
 import org.apache.pinot.common.Utils;
 import org.apache.pinot.common.metrics.ControllerMeter;
@@ -112,6 +113,8 @@ public class ControllerStarter {
 
   public ControllerStarter(ControllerConf conf) {
     _config = conf;
+    setupHelixSystemProperties();
+
     _controllerMode = conf.getControllerMode();
     // Helix related settings.
     _helixZkURL = HelixConfig.getAbsoluteZkPathForHelix(_config.getZkStr());
@@ -137,6 +140,15 @@ public class ControllerStarter {
     }
   }
 
+  private void setupHelixSystemProperties() {
+    // NOTE: Helix will disconnect the manager and disable the instance if it 
detects flapping (too frequent disconnect
+    // from ZooKeeper). Setting flapping time window to a small value can 
avoid this from happening. Helix ignores the
+    // non-positive value, so set the default value as 1.
+    System.setProperty(SystemPropertyKeys.FLAPPING_TIME_WINDOW, _config
+        
.getString(CommonConstants.Helix.CONFIG_OF_CONTROLLER_FLAPPING_TIME_WINDOW_MS,
+            CommonConstants.Helix.DEFAULT_FLAPPING_TIME_WINDOW_MS));
+  }
+
   public PinotHelixResourceManager getHelixResourceManager() {
     return _helixResourceManager;
   }
@@ -191,7 +203,8 @@ public class ControllerStarter {
         LOGGER.error("Invalid mode: " + _controllerMode);
     }
 
-    ServiceStatus.setServiceStatusCallback(new 
ServiceStatus.MultipleCallbackServiceStatusCallback(_serviceStatusCallbackList));
+    ServiceStatus
+        .setServiceStatusCallback(new 
ServiceStatus.MultipleCallbackServiceStatusCallback(_serviceStatusCallbackList));
     _controllerMetrics.initializeGlobalMeters();
   }
 
@@ -241,7 +254,8 @@ public class ControllerStarter {
 
     // Helix resource manager must be started in order to create 
PinotLLCRealtimeSegmentManager
     LOGGER.info("Starting realtime segment manager");
-    PinotLLCRealtimeSegmentManager.create(_helixResourceManager, _config, 
_controllerMetrics, _controllerLeadershipManager);
+    PinotLLCRealtimeSegmentManager
+        .create(_helixResourceManager, _config, _controllerMetrics, 
_controllerLeadershipManager);
     _realtimeSegmentsManager = new 
PinotRealtimeSegmentManager(_helixResourceManager, 
_controllerLeadershipManager);
     _realtimeSegmentsManager.start(_controllerMetrics);
 
@@ -414,18 +428,18 @@ public class ControllerStarter {
   }
 
   public void stop() {
-      switch (_controllerMode) {
-        case DUAL:
-          stopPinotController();
-          stopHelixController();
-          break;
-        case PINOT_ONLY:
-          stopPinotController();
-          break;
-        case HELIX_ONLY:
-          stopHelixController();
-          break;
-      }
+    switch (_controllerMode) {
+      case DUAL:
+        stopPinotController();
+        stopHelixController();
+        break;
+      case PINOT_ONLY:
+        stopPinotController();
+        break;
+      case HELIX_ONLY:
+        stopHelixController();
+        break;
+    }
   }
 
   private void stopHelixController() {
diff --git 
a/pinot-minion/src/main/java/org/apache/pinot/minion/MinionStarter.java 
b/pinot-minion/src/main/java/org/apache/pinot/minion/MinionStarter.java
index 2d17eac..2139fac 100644
--- a/pinot-minion/src/main/java/org/apache/pinot/minion/MinionStarter.java
+++ b/pinot-minion/src/main/java/org/apache/pinot/minion/MinionStarter.java
@@ -28,6 +28,7 @@ import org.apache.commons.configuration.Configuration;
 import org.apache.helix.HelixAdmin;
 import org.apache.helix.HelixManager;
 import org.apache.helix.InstanceType;
+import org.apache.helix.SystemPropertyKeys;
 import org.apache.helix.manager.zk.ZKHelixManager;
 import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.task.TaskStateModelFactory;
@@ -77,11 +78,21 @@ public class MinionStarter {
     _instanceId = 
config.getString(CommonConstants.Helix.Instance.INSTANCE_ID_KEY,
         CommonConstants.Minion.INSTANCE_PREFIX + NetUtil.getHostAddress() + "_"
             + CommonConstants.Minion.DEFAULT_HELIX_PORT);
+    setupHelixSystemProperties();
     _helixManager = new ZKHelixManager(_helixClusterName, _instanceId, 
InstanceType.PARTICIPANT, zkAddress);
     _taskExecutorFactoryRegistry = new TaskExecutorFactoryRegistry();
     _eventObserverFactoryRegistry = new EventObserverFactoryRegistry();
   }
 
+  private void setupHelixSystemProperties() {
+    // NOTE: Helix will disconnect the manager and disable the instance if it 
detects flapping (too frequent disconnect
+    // from ZooKeeper). Setting flapping time window to a small value can 
avoid this from happening. Helix ignores the
+    // non-positive value, so set the default value as 1.
+    System.setProperty(SystemPropertyKeys.FLAPPING_TIME_WINDOW, _config
+        
.getString(CommonConstants.Helix.CONFIG_OF_MINION_FLAPPING_TIME_WINDOW_MS,
+            CommonConstants.Helix.DEFAULT_FLAPPING_TIME_WINDOW_MS));
+  }
+
   /**
    * Registers a task executor factory.
    * <p>This is for pluggable task executor factories.
diff --git 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/DefaultHelixStarterServerConfig.java
 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/DefaultHelixStarterServerConfig.java
index 46f29d5..aa50ca6 100644
--- 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/DefaultHelixStarterServerConfig.java
+++ 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/DefaultHelixStarterServerConfig.java
@@ -64,9 +64,6 @@ public class DefaultHelixStarterServerConfig {
     
serverConf.addProperty("pinot.server.query.executor.pruner.ValidSegmentPruner.id",
 "2");
     
serverConf.addProperty("pinot.server.query.executor.pruner.PartitionSegmentPruner.id",
 "3");
 
-    
serverConf.addProperty(CommonConstants.Helix.CONFIG_OF_HELIX_FLAPPING_TIMEWINDOW_MS,
-        CommonConstants.Helix.DEFAULT_HELIX_FLAPPING_TIMEWINDOW_MS);
-
     // request handler factory parameters
     
serverConf.addProperty(CommonConstants.Server.CONFIG_OF_REQUEST_HANDLER_FACTORY_CLASS,
         CommonConstants.Server.DEFAULT_REQUEST_HANDLER_FACTORY_CLASS);
diff --git 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixServerStarter.java
 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixServerStarter.java
index fa491d9..64f7b32 100644
--- 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixServerStarter.java
+++ 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixServerStarter.java
@@ -38,6 +38,7 @@ import org.apache.helix.HelixManagerFactory;
 import org.apache.helix.InstanceType;
 import org.apache.helix.PropertyKey;
 import org.apache.helix.PropertyKey.Builder;
+import org.apache.helix.SystemPropertyKeys;
 import org.apache.helix.ZNRecord;
 import org.apache.helix.model.CurrentState;
 import org.apache.helix.model.ExternalView;
@@ -129,7 +130,7 @@ public class HelixServerStarter {
     }
 
     LOGGER.info("Connecting Helix components");
-    setupHelixSystemProperties(_helixServerConfig);
+    setupHelixSystemProperties();
     // Replace all white-spaces from list of zkServers.
     _zkServers = zkServer.replaceAll("\\s+", "");
     _helixManager =
@@ -178,8 +179,9 @@ public class HelixServerStarter {
         .addPreConnectCallback(() -> 
serverMetrics.addMeteredGlobalValue(ServerMeter.HELIX_ZOOKEEPER_RECONNECTS, 
1L));
 
     // Register the service status handler
-    final double minResourcePercentForStartup = 
_helixServerConfig.getDouble(CommonConstants.Server.CONFIG_OF_SERVER_MIN_RESOURCE_PERCENT_FOR_START,
-        CommonConstants.Server.DEFAULT_SERVER_MIN_RESOURCE_PERCENT_FOR_START);
+    double minResourcePercentForStartup = _helixServerConfig
+        
.getDouble(CommonConstants.Server.CONFIG_OF_SERVER_MIN_RESOURCE_PERCENT_FOR_START,
+            
CommonConstants.Server.DEFAULT_SERVER_MIN_RESOURCE_PERCENT_FOR_START);
     ServiceStatus.setServiceStatusCallback(new 
ServiceStatus.MultipleCallbackServiceStatusCallback(ImmutableList
         .of(new 
ServiceStatus.IdealStateAndCurrentStateMatchServiceStatusCallback(_helixManager,
 _helixClusterName,
                 _instanceId, minResourcePercentForStartup),
@@ -330,19 +332,13 @@ public class HelixServerStarter {
     }
   }
 
-  private void setupHelixSystemProperties(Configuration conf) {
-    // [PINOT-2435] [PINOT-3927] Disable helix detection of flapping connection
-    // Helix will shutdown and effectively remove the instance from cluster if
-    // it detects flapping while the process continues to run
-    // Helix ignores the value if it is <= 0. Hence, setting time window to 
small value
-    // and number of connection failures within that window to high value
-    
System.setProperty(CommonConstants.Helix.HELIX_MANAGER_FLAPPING_TIME_WINDOW_KEY,
-        
conf.getString(CommonConstants.Helix.CONFIG_OF_HELIX_FLAPPING_TIMEWINDOW_MS,
-            CommonConstants.Helix.DEFAULT_HELIX_FLAPPING_TIMEWINDOW_MS));
-
-    
System.setProperty(CommonConstants.Helix.HELIX_MANAGER_MAX_DISCONNECT_THRESHOLD_KEY,
-        
conf.getString(CommonConstants.Helix.CONFIG_OF_HELIX_MAX_DISCONNECT_THRESHOLD,
-            
CommonConstants.Helix.DEFAULT_HELIX_FLAPPING_MAX_DISCONNECT_THRESHOLD));
+  private void setupHelixSystemProperties() {
+    // NOTE: Helix will disconnect the manager and disable the instance if it 
detects flapping (too frequent disconnect
+    // from ZooKeeper). Setting flapping time window to a small value can 
avoid this from happening. Helix ignores the
+    // non-positive value, so set the default value as 1.
+    System.setProperty(SystemPropertyKeys.FLAPPING_TIME_WINDOW, 
_helixServerConfig
+        
.getString(CommonConstants.Helix.CONFIG_OF_SERVER_FLAPPING_TIME_WINDOW_MS,
+            CommonConstants.Helix.DEFAULT_FLAPPING_TIME_WINDOW_MS));
   }
 
   public void stop() {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to