Author: liyin Date: Wed Apr 2 20:49:11 2014 New Revision: 1584163 URL: http://svn.apache.org/r1584163 Log: [HBASE-10777] Restart Embedded Thrift Server in case of failures
Author: rshroff Summary: When the overall load on the embedded thrift server increases, the TThreadedThriftServer crashes. The RegionServer is still alive and serving regions but clients talking to embedded thrift server cannot talk with those regions. This is a hack for now to restart the embedded thrift server in case of such failures. TThreadedThriftServer is outdated and we are planning to get rid of the proxy thrift server moving forward. Hence, I have added this hack. Test Plan: existing MR unit tests. Test it on couple of RS on SH003 Reviewers: manukranthk Reviewed By: manukranthk CC: hbase-dev@, csliu, chaoyc, dcapra Differential Revision: https://phabricator.fb.com/D1222171 Task ID: 3797478 Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/thrift/ThriftMetrics.java hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/thrift/ThriftServerRunner.java Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/thrift/ThriftMetrics.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/thrift/ThriftMetrics.java?rev=1584163&r1=1584162&r2=1584163&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/thrift/ThriftMetrics.java (original) +++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/thrift/ThriftMetrics.java Wed Apr 2 20:49:11 2014 @@ -68,6 +68,8 @@ public class ThriftMetrics implements Up new MetricsTimeVaryingRate("thriftCall", registry); private MetricsTimeVaryingRate slowThriftCall = new MetricsTimeVaryingRate("slowThriftCall", registry); + private final MetricsIntValue numRestarted = + new MetricsIntValue("num_restarts", registry); /** * Number of calls that go through HTable. Only updated in the embedded Thrift server. This only @@ -142,6 +144,10 @@ public class ThriftMetrics implements Up this.directCalls.inc(); } + public void incNumRestarted() { + numRestarted.set(numRestarted.get() + 1); + } + public void incMethodTime(String name, long time) { MetricsTimeVaryingRate methodTimeMetric = getMethodTimeMetrics(name); if (methodTimeMetric == null) { Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/thrift/ThriftServerRunner.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/thrift/ThriftServerRunner.java?rev=1584163&r1=1584162&r2=1584163&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/thrift/ThriftServerRunner.java (original) +++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/thrift/ThriftServerRunner.java Wed Apr 2 20:49:11 2014 @@ -36,6 +36,7 @@ import java.util.concurrent.ExecutorServ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; @@ -114,6 +115,8 @@ public class ThriftServerRunner implemen volatile TServer tserver; + AtomicBoolean shutdownRequested = new AtomicBoolean(false); + private static ImplType DEFAULT_SERVER_TYPE = ImplType.THREADED_SELECTOR; private static String NOT_SUPPORTED_BY_PROXY_MSG = "Not supported by Thrift proxy"; @@ -254,16 +257,19 @@ public class ThriftServerRunner implemen */ @Override public void run() { - try { - setupServer(); - tserver.serve(); - } catch (Exception e) { - LOG.fatal("Cannot run ThriftServer", e); - throw new RuntimeException(e); + while (!shutdownRequested.get()) { + try { + setupServer(); + tserver.serve(); + } catch (Exception e) { + LOG.error("Cannot run ThriftServer", e); + metrics.incNumRestarted(); + } } } public void shutdown() { + shutdownRequested.set(true); if (tserver != null) { tserver.stop(); tserver = null;
