Repository: oozie Updated Branches: refs/heads/master fe2e9d2ef -> c0b5497b2
OOZIE-2467 Oozie can shutdown itself on long GC pause Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/c0b5497b Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/c0b5497b Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/c0b5497b Branch: refs/heads/master Commit: c0b5497b209be0f53f0b05336d883fc329b2a1d2 Parents: fe2e9d2 Author: Purshotam Shah <[email protected]> Authored: Thu May 26 16:01:22 2016 -0700 Committer: Purshotam Shah <[email protected]> Committed: Thu May 26 16:01:22 2016 -0700 ---------------------------------------------------------------------- .../event/listener/ZKConnectionListener.java | 6 ++--- .../oozie/service/ConfigurationService.java | 5 ++++ .../java/org/apache/oozie/util/ZKUtils.java | 27 ++++++++++++-------- core/src/main/resources/oozie-default.xml | 18 +++++++++++-- release-log.txt | 1 + 5 files changed, 40 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/c0b5497b/core/src/main/java/org/apache/oozie/event/listener/ZKConnectionListener.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/event/listener/ZKConnectionListener.java b/core/src/main/java/org/apache/oozie/event/listener/ZKConnectionListener.java index c6415b1..c27310e 100644 --- a/core/src/main/java/org/apache/oozie/event/listener/ZKConnectionListener.java +++ b/core/src/main/java/org/apache/oozie/event/listener/ZKConnectionListener.java @@ -24,7 +24,6 @@ import org.apache.curator.framework.state.ConnectionStateListener; import org.apache.oozie.service.ConfigurationService; import org.apache.oozie.service.Services; import org.apache.oozie.util.XLog; -import org.apache.oozie.util.ZKUtils; /** * ZKConnectionListener listens on ZK connection status. @@ -47,8 +46,7 @@ public class ZKConnectionListener implements ConnectionStateListener { // ZK connected // } if (newState == ConnectionState.SUSPENDED) { - LOG.warn("ZK connection is suspended, waiting for reconnect. If connection doesn't reconnect before " - + ZKUtils.getZKConnectionTimeout() + " (sec) Oozie server will shutdown itself"); + LOG.warn("ZK connection is suspended, waiting to reconnect."); } if (newState == ConnectionState.RECONNECTED) { @@ -57,7 +55,7 @@ public class ZKConnectionListener implements ConnectionStateListener { } if (newState == ConnectionState.LOST) { - LOG.fatal("ZK is not reconnected in " + ZKUtils.getZKConnectionTimeout()); + LOG.fatal("ZK is not reconnected"); if (ConfigurationService.getBoolean(CONF_SHUTDOWN_ON_TIMEOUT)) { LOG.fatal("Shutting down Oozie server"); Services.get().destroy(); http://git-wip-us.apache.org/repos/asf/oozie/blob/c0b5497b/core/src/main/java/org/apache/oozie/service/ConfigurationService.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/service/ConfigurationService.java b/core/src/main/java/org/apache/oozie/service/ConfigurationService.java index 9d4dcd9..7f140d9 100644 --- a/core/src/main/java/org/apache/oozie/service/ConfigurationService.java +++ b/core/src/main/java/org/apache/oozie/service/ConfigurationService.java @@ -534,6 +534,11 @@ public class ConfigurationService implements Service, Instrumentable { return getInt(conf, name); } + public static int getInt(String name, int defaultValue) { + Configuration conf = Services.get().getConf(); + return conf.getInt(name, defaultValue); + } + public static int getInt(Configuration conf, String name) { return conf.getInt(name, ConfigUtils.INT_DEFAULT); } http://git-wip-us.apache.org/repos/asf/oozie/blob/c0b5497b/core/src/main/java/org/apache/oozie/util/ZKUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/util/ZKUtils.java b/core/src/main/java/org/apache/oozie/util/ZKUtils.java index 91f8f70..8069641 100644 --- a/core/src/main/java/org/apache/oozie/util/ZKUtils.java +++ b/core/src/main/java/org/apache/oozie/util/ZKUtils.java @@ -94,11 +94,21 @@ public class ZKUtils { public static final String ZK_NAMESPACE = "oozie.zookeeper.namespace"; /** - *Default ZK connection timeout ( in sec). If connection is lost for more than timeout, then Oozie server will shutdown itself. + *Default ZK connection timeout ( in sec). */ public static final String ZK_CONNECTION_TIMEOUT = "oozie.zookeeper.connection.timeout"; /** + *Default ZK session timeout ( in sec). If connection is lost after retry, then Oozie server will shutdown itself. + */ + public static final String ZK_SESSION_TIMEOUT = "oozie.zookeeper.session.timeout"; + + /** + * Maximum number of times to retry. + */ + public static final String ZK_MAX_RETRIES = "oozie.zookeeper.max.retries"; + + /** * oozie-env environment variable for specifying the Oozie instance ID */ public static final String OOZIE_INSTANCE_ID = "oozie.instance.id"; @@ -124,7 +134,7 @@ public class ZKUtils { private XLog log; private static ZKUtils zk = null; - private static int zkConnectionTimeout; + /** * Private Constructor for the singleton; it connects to ZooKeeper and advertises this Oozie Server. @@ -179,7 +189,8 @@ public class ZKUtils { RetryPolicy retryPolicy = ZKUtils.getRetryPolicy(); String zkConnectionString = ConfigurationService.get(ZK_CONNECTION_STRING); String zkNamespace = getZKNameSpace(); - zkConnectionTimeout = ConfigurationService.getInt(ZK_CONNECTION_TIMEOUT); + int zkConnectionTimeout = ConfigurationService.getInt(ZK_CONNECTION_TIMEOUT); + int zkSessionTimeout = ConfigurationService.getInt(ZK_SESSION_TIMEOUT, 300); ACLProvider aclProvider; if (Services.get().getConf().getBoolean(ZK_SECURE, false)) { @@ -199,6 +210,7 @@ public class ZKUtils { .retryPolicy(retryPolicy) .aclProvider(aclProvider) .connectionTimeoutMs(zkConnectionTimeout * 1000) // in ms + .sessionTimeoutMs(zkSessionTimeout * 1000) //in ms .build(); client.start(); client.getConnectionStateListenable().addListener(new ZKConnectionListener()); @@ -409,7 +421,7 @@ public class ZKUtils { * @return RetryPolicy */ public static RetryPolicy getRetryPolicy() { - return new ExponentialBackoffRetry(1000, 3); + return new ExponentialBackoffRetry(1000, ConfigurationService.getInt(ZK_MAX_RETRIES, 10)); } /** @@ -419,11 +431,4 @@ public class ZKUtils { public static String getZKNameSpace() { return ConfigurationService.get(ZK_NAMESPACE); } - /** - * Return ZK connection timeout - * @return - */ - public static int getZKConnectionTimeout(){ - return zkConnectionTimeout; - } } http://git-wip-us.apache.org/repos/asf/oozie/blob/c0b5497b/core/src/main/resources/oozie-default.xml ---------------------------------------------------------------------- diff --git a/core/src/main/resources/oozie-default.xml b/core/src/main/resources/oozie-default.xml index 942c847..2a2822b 100644 --- a/core/src/main/resources/oozie-default.xml +++ b/core/src/main/resources/oozie-default.xml @@ -2381,8 +2381,22 @@ will be the requeue interval for the actions which are waiting for a long time w <name>oozie.zookeeper.connection.timeout</name> <value>180</value> <description> - Default ZK connection timeout (in sec). If connection is lost for more than timeout, then Oozie server will shutdown - itself if oozie.zookeeper.server.shutdown.ontimeout is true. + Default ZK connection timeout (in sec). + </description> + </property> + <property> + <name>oozie.zookeeper.session.timeout</name> + <value>300</value> + <description> + Default ZK session timeout (in sec). If connection is lost even after retry, then Oozie server will shutdown + itself if oozie.zookeeper.server.shutdown.ontimeout is true. + </description> + </property> + <property> + <name>oozie.zookeeper.max.retries</name> + <value>10</value> + <description> + Maximum number of times to retry. </description> </property> http://git-wip-us.apache.org/repos/asf/oozie/blob/c0b5497b/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index 0d4282f..7f40a99 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 4.3.0 release (trunk - unreleased) +OOZIE-2467 Oozie can shutdown itself on long GC pause (puru) OOZIE-2537 SqoopMain does not set up log4j properly (pbacsko via rkanter) OOZIE-2532 patch apply does not handle binary files (gezapeti via rkanter) OOZIE-2330 Spark action should take the global jobTracker and nameNode configs by default and allow file and archive elements (satishsaley via rkanter)
