Repository: hbase Updated Branches: refs/heads/branch-1.2 a6eae827f -> a2617b00c
HBASE-18141 Regionserver fails to shutdown when abort triggered during RPC call Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a2617b00 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a2617b00 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a2617b00 Branch: refs/heads/branch-1.2 Commit: a2617b00c0fd330d5b779c4aedc65e9dcc08cb5c Parents: a6eae82 Author: Gary Helmling <[email protected]> Authored: Thu Jun 1 15:47:04 2017 -0700 Committer: Gary Helmling <[email protected]> Committed: Thu Jun 8 18:42:37 2017 -0700 ---------------------------------------------------------------------- .../hbase/regionserver/HRegionServer.java | 50 ++++- .../regionserver/TestRegionServerAbort.java | 210 +++++++++++++++++++ 2 files changed, 250 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/a2617b00/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index c6be3c3..129b5a7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -27,6 +27,7 @@ import java.lang.reflect.Constructor; import java.net.BindException; import java.net.InetAddress; import java.net.InetSocketAddress; +import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -142,6 +143,7 @@ import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL; import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; import org.apache.hadoop.hbase.replication.regionserver.ReplicationLoad; import org.apache.hadoop.hbase.security.Superusers; +import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.security.UserProvider; import org.apache.hadoop.hbase.trace.SpanReceiverHost; import org.apache.hadoop.hbase.util.Addressing; @@ -1887,18 +1889,32 @@ public class HRegionServer extends HasThread implements @Override public void stop(final String msg) { + stop(msg, false); + } + + /** + * Stops the regionserver. + * @param msg Status message + * @param force True if this is a regionserver abort + */ + public void stop(final String msg, final boolean force) { if (!this.stopped) { - try { - if (this.rsHost != null) { + if (this.rsHost != null) { + // when forced via abort don't allow CPs to override + try { this.rsHost.preStop(msg); + } catch (IOException ioe) { + if (!force) { + LOG.warn("The region server did not stop", ioe); + return; + } + LOG.warn("Skipping coprocessor exception on preStop() due to forced shutdown", ioe); } - this.stopped = true; - LOG.info("STOPPED: " + msg); - // Wakes run() if it is sleeping - sleeper.skipSleepCycle(); - } catch (IOException exp) { - LOG.warn("The region server did not stop", exp); } + this.stopped = true; + LOG.info("STOPPED: " + msg); + // Wakes run() if it is sleeping + sleeper.skipSleepCycle(); } } @@ -2065,7 +2081,7 @@ public class HRegionServer extends HasThread implements * the exception that caused the abort, or null */ @Override - public void abort(String reason, Throwable cause) { + public void abort(final String reason, Throwable cause) { String msg = "ABORTING region server " + this + ": " + reason; if (cause != null) { LOG.fatal(msg, cause); @@ -2103,7 +2119,21 @@ public class HRegionServer extends HasThread implements } catch (Throwable t) { LOG.warn("Unable to report fatal error to master", t); } - stop(reason); + // shutdown should be run as the internal user + if (User.isHBaseSecurityEnabled(conf)) { + try { + User.runAsLoginUser(new PrivilegedExceptionAction<Object>() { + @Override + public Object run() throws Exception { + stop(reason, true); + return null; + } + }); + } catch (IOException neverThrown) { + } + } else { + stop(reason, true); + } } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/a2617b00/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerAbort.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerAbort.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerAbort.java new file mode 100644 index 0000000..3d66c5f --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerAbort.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.coprocessor.BaseRegionServerObserver; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hadoop.hbase.coprocessor.ObserverContext; +import org.apache.hadoop.hbase.coprocessor.RegionServerCoprocessorEnvironment; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hbase.wal.WAL; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.IOException; +import java.util.Collection; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Tests around regionserver shutdown and abort + */ +@Category({RegionServerTests.class, MediumTests.class}) +public class TestRegionServerAbort { + private static final byte[] FAMILY_BYTES = Bytes.toBytes("f"); + + private static final Log LOG = LogFactory.getLog(TestRegionServerAbort.class); + + private HBaseTestingUtility testUtil; + private Configuration conf; + private MiniDFSCluster dfsCluster; + private MiniHBaseCluster cluster; + + @Before + public void setup() throws Exception { + testUtil = new HBaseTestingUtility(); + conf = testUtil.getConfiguration(); + conf.set(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY, + StopBlockingRegionObserver.class.getName()); + // make sure we have multiple blocks so that the client does not prefetch all block locations + conf.set("dfs.blocksize", Long.toString(100 * 1024)); + // prefetch the first block + conf.set(DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY, Long.toString(100 * 1024)); + conf.set(HConstants.REGION_IMPL, ErrorThrowingHRegion.class.getName()); + + testUtil.startMiniZKCluster(); + dfsCluster = testUtil.startMiniDFSCluster(2); + cluster = testUtil.startMiniHBaseCluster(1, 2); + } + + @After + public void tearDown() throws Exception { + for (JVMClusterUtil.RegionServerThread t : cluster.getRegionServerThreads()) { + HRegionServer rs = t.getRegionServer(); + RegionServerCoprocessorHost cpHost = rs.getRegionServerCoprocessorHost(); + StopBlockingRegionObserver cp = (StopBlockingRegionObserver) + cpHost.findCoprocessor(StopBlockingRegionObserver.class.getName()); + cp.setStopAllowed(true); + } + testUtil.shutdownMiniCluster(); + } + + /** + * Test that a regionserver is able to abort properly, even when a coprocessor + * throws an exception in preStopRegionServer(). + */ + @Test + public void testAbortFromRPC() throws Exception { + TableName tableName = TableName.valueOf("testAbortFromRPC"); + // create a test table + HTable table = testUtil.createTable(tableName, FAMILY_BYTES); + + // write some edits + testUtil.loadTable(table, FAMILY_BYTES); + LOG.info("Wrote data"); + // force a flush + cluster.flushcache(tableName); + LOG.info("Flushed table"); + + // delete a store file from the table region + HRegion firstRegion = cluster.findRegionsForTable(tableName).get(0); + + // aborting from region + HRegionFileSystem regionFS = firstRegion.getRegionFileSystem(); + Collection<StoreFileInfo> storeFileInfos = regionFS.getStoreFiles(FAMILY_BYTES); + assertFalse(storeFileInfos.isEmpty()); + StoreFileInfo firstStoreFile = storeFileInfos.iterator().next(); + + // move the store file away + // we will still be able to read the first block, since the location was pre-fetched on open + // but attempts to read subsequent blocks will fail + LOG.info("Moving store file " + firstStoreFile.getPath()); + FileSystem fs = regionFS.getFileSystem(); + Path tmpdir = new Path("/tmp"); + fs.mkdirs(tmpdir); + assertTrue(fs.rename(firstStoreFile.getPath(), + new Path(tmpdir, firstStoreFile.getPath().getName()))); + + // start a scan, this should trigger a regionserver abort + ResultScanner scanner = table.getScanner(new Scan()); + int count = 0; + for (Result f : scanner) { + count++; + } + LOG.info("Finished scan with " + count + " results"); + // should have triggered an abort due to FileNotFoundException + + // verify that the regionserver is stopped + assertTrue(firstRegion.getRegionServerServices().isAborted()); + assertTrue(firstRegion.getRegionServerServices().isStopped()); + } + + /** + * Test that a coprocessor is able to override a normal regionserver stop request. + */ + @Test + public void testStopOverrideFromCoprocessor() throws Exception { + Admin admin = testUtil.getHBaseAdmin(); + HRegionServer regionserver = cluster.getRegionServer(0); + admin.stopRegionServer(regionserver.getServerName().getHostAndPort()); + + // regionserver should have failed to stop due to coprocessor + assertFalse(cluster.getRegionServer(0).isAborted()); + assertFalse(cluster.getRegionServer(0).isStopped()); + } + + public static class StopBlockingRegionObserver extends BaseRegionServerObserver { + private boolean stopAllowed; + + @Override + public void preStopRegionServer(ObserverContext<RegionServerCoprocessorEnvironment> env) + throws IOException { + if (!stopAllowed) { + throw new IOException("Stop not allowed"); + } + } + + public void setStopAllowed(boolean allowed) { + this.stopAllowed = allowed; + } + + public boolean isStopAllowed() { + return stopAllowed; + } + } + + /** + * Throws an exception during store file refresh in order to trigger a regionserver abort. + */ + public static class ErrorThrowingHRegion extends HRegion { + public ErrorThrowingHRegion(Path tableDir, WAL wal, FileSystem fs, Configuration confParam, + HRegionInfo regionInfo, HTableDescriptor htd, + RegionServerServices rsServices) { + super(tableDir, wal, fs, confParam, regionInfo, htd, rsServices); + } + + public ErrorThrowingHRegion(HRegionFileSystem fs, WAL wal, Configuration confParam, + HTableDescriptor htd, RegionServerServices rsServices) { + super(fs, wal, confParam, htd, rsServices); + } + + @Override + protected boolean refreshStoreFiles(boolean force) throws IOException { + // forced when called through RegionScannerImpl.handleFileNotFound() + if (force) { + throw new IOException("Failing file refresh for testing"); + } + return super.refreshStoreFiles(force); + } + } +}
