Author: stack Date: Fri Oct 12 10:46:37 2007 New Revision: 584212 URL: http://svn.apache.org/viewvc?rev=584212&view=rev Log: HADOOP-2038 TestCleanRegionServerExit failed in patch build #927
Added: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerExit.java Removed: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestCleanRegionServerExit.java lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerAbort.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=584212&r1=584211&r2=584212&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original) +++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Fri Oct 12 10:46:37 2007 @@ -75,6 +75,7 @@ HADOOP-2017 TestRegionServerAbort failure in patch build #903 and nightly #266 HADOOP-2029 TestLogRolling fails too often in patch and nightlies + HADOOP-2038 TestCleanRegionExit failed in patch build #927 IMPROVEMENTS HADOOP-1737 Make HColumnDescriptor data publically members settable Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java?rev=584212&r1=584211&r2=584212&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Fri Oct 12 10:46:37 2007 @@ -822,7 +822,8 @@ * Access to this map and loadToServers and serversToLoad must be synchronized * on this object */ - Map<String, HServerInfo> serversToServerInfo; + final Map<String, HServerInfo> serversToServerInfo = + new HashMap<String, HServerInfo>(); /** SortedMap server load -> Set of server names */ SortedMap<HServerLoad, Set<String>> loadToServers; @@ -871,27 +872,22 @@ LOG.info("bootstrap: creating ROOT and first META regions"); try { HRegion root = HRegion.createHRegion(HGlobals.rootRegionInfo, this.dir, - this.conf, null); - - HRegion meta = - HRegion.createHRegion(new HRegionInfo(1L, HGlobals.metaTableDesc, - null, null), this.dir, this.conf, null); + this.conf, null); + HRegion meta = HRegion.createHRegion(new HRegionInfo(1L, + HGlobals.metaTableDesc, null, null), this.dir, this.conf, null); // Add first region from the META table to the ROOT region. - HRegion.addRegionToMETA(root, meta); root.close(); root.getLog().closeAndDelete(); meta.close(); meta.getLog().closeAndDelete(); - } catch (IOException e) { e = RemoteExceptionHandler.checkIOException(e); LOG.error("bootstrap", e); throw e; } } - } catch (IOException e) { LOG.fatal("Not starting HMaster because:", e); throw e; @@ -905,7 +901,7 @@ this.shutdownQueue = new DelayQueue<PendingServerShutdown>(); this.msgQueue = new LinkedBlockingQueue<PendingOperation>(); - this.leaseTimeout = conf.getInt("hbase.master.lease.period", 30 * 1000); + this.leaseTimeout = conf.getInt("hbase.master.lease.period", 30 * 1000); this.serverLeases = new Leases(this.leaseTimeout, conf.getInt("hbase.master.lease.thread.wakefrequency", 15 * 1000)); @@ -914,7 +910,6 @@ false, conf); // The rpc-server port can be ephemeral... ensure we have the correct info - this.address = new HServerAddress(server.getListenerAddress()); conf.set(MASTER_ADDRESS, address.toString()); @@ -924,7 +919,6 @@ conf.getInt("hbase.master.meta.thread.rescanfrequency", 60 * 1000); // The root region - this.rootScanned = false; this.rootScannerThread = new RootScanner(); @@ -948,7 +942,6 @@ this.regionsToDelete = Collections.synchronizedSet(new HashSet<Text>()); - this.serversToServerInfo = new HashMap<String, HServerInfo>(); this.loadToServers = new TreeMap<HServerLoad, Set<String>>(); this.serversToLoad = new HashMap<String, HServerLoad>(); @@ -1042,7 +1035,7 @@ */ try { for (PendingOperation op = null; !closed.get(); ) { - op = shutdownQueue.poll(); + op = this.shutdownQueue.poll(); if (op == null ) { try { op = msgQueue.poll(threadWakeFrequency, TimeUnit.MILLISECONDS); @@ -1720,8 +1713,10 @@ } /* - * Assign all to the only server. An unlikely case but still possible. @param - * regionsToAssign @param serverName @param returnMsgs + * Assign all to the only server. An unlikely case but still possible. + * @param regionsToAssign + * @param serverName + * @param returnMsgs */ private void assignRegionsToOneServer(final TreeSet<Text> regionsToAssign, final String serverName, final ArrayList<HMsg> returnMsgs) { @@ -1768,8 +1763,8 @@ * serving, and the regions need to get reassigned. */ private class PendingServerShutdown extends PendingOperation - implements Delayed { - private long delay; + implements Delayed { + private final long expire; private HServerAddress deadServer; private String deadServerName; private Path oldLogDir; @@ -1793,7 +1788,6 @@ PendingServerShutdown(HServerInfo serverInfo) { super(); - this.delay = leaseTimeout / 2; this.deadServer = serverInfo.getServerAddress(); this.deadServerName = this.deadServer.toString(); this.logSplit = false; @@ -1806,11 +1800,15 @@ dirName.append("_"); dirName.append(deadServer.getPort()); this.oldLogDir = new Path(dir, dirName.toString()); + // Set the future time at which we expect to be released from the + // DelayQueue we're inserted in on lease expiration. + this.expire = System.currentTimeMillis() + leaseTimeout / 2; } /** [EMAIL PROTECTED] */ public long getDelay(TimeUnit unit) { - return unit.convert(delay, TimeUnit.MILLISECONDS); + return unit.convert(this.expire - System.currentTimeMillis(), + TimeUnit.MILLISECONDS); } /** [EMAIL PROTECTED] */ Added: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerExit.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerExit.java?rev=584212&view=auto ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerExit.java (added) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerExit.java Fri Oct 12 10:46:37 2007 @@ -0,0 +1,148 @@ +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.io.IOException; +import java.util.TreeMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.io.Text; + +/** + * Tests region server failover when a region server exits both cleanly and + * when it aborts. + */ +public class TestRegionServerExit extends HBaseClusterTestCase { + final Log LOG = LogFactory.getLog(this.getClass().getName()); + HTable table; + + /** constructor */ + public TestRegionServerExit() { + super(2); + conf.setInt("ipc.client.timeout", 10000); // reduce client timeout + conf.setInt("ipc.client.connect.max.retries", 5); // and number of retries + conf.setInt("hbase.client.retries.number", 5); // reduce HBase retries + } + + /** + * Test abort of region server. + * @throws IOException + */ + public void testAbort() throws IOException { + // When the META table can be opened, the region servers are running + new HTable(conf, HConstants.META_TABLE_NAME); + // Create table and add a row. + final String tableName = getName(); + Text row = createTableAndAddRow(tableName); + // Start up a new region server to take over serving of root and meta + // after we shut down the current meta/root host. + this.cluster.startRegionServer(); + // Now abort the region server and wait for it to go down. + this.cluster.abortRegionServer(0); + LOG.info(this.cluster.waitOnRegionServer(0) + " has been aborted"); + Thread t = startVerificationThread(tableName, row); + t.start(); + threadDumpingJoin(t); + } + + /** + * Test abort of region server. + * @throws IOException + */ + public void REMOVEtestCleanExit() throws IOException { + // When the META table can be opened, the region servers are running + new HTable(this.conf, HConstants.META_TABLE_NAME); + // Create table and add a row. + final String tableName = getName(); + Text row = createTableAndAddRow(tableName); + // Start up a new region server to take over serving of root and meta + // after we shut down the current meta/root host. + this.cluster.startRegionServer(); + // Now shutdown the region server and wait for it to go down. + this.cluster.stopRegionServer(0); + LOG.info(this.cluster.waitOnRegionServer(0) + " has been shutdown"); + Thread t = startVerificationThread(tableName, row); + t.start(); + threadDumpingJoin(t); + } + + private Text createTableAndAddRow(final String tableName) throws IOException { + HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(HConstants.COLUMN_FAMILY.toString())); + HBaseAdmin admin = new HBaseAdmin(conf); + admin.createTable(desc); + // put some values in the table + this.table = new HTable(conf, new Text(tableName)); + final Text row = new Text("row1"); + long lockid = table.startUpdate(row); + table.put(lockid, HConstants.COLUMN_FAMILY, + tableName.getBytes(HConstants.UTF8_ENCODING)); + table.commit(lockid); + return row; + } + + /* + * Run verification in a thread so I can concurrently run a thread-dumper + * while we're waiting (because in this test sometimes the meta scanner + * looks to be be stuck). + * @param tableName Name of table to find. + * @param row Row we expect to find. + * @return Verification thread. Caller needs to calls start on it. + */ + private Thread startVerificationThread(final String tableName, + final Text row) { + Runnable runnable = new Runnable() { + public void run() { + HScannerInterface scanner = null; + try { + // Verify that the client can find the data after the region has moved + // to a different server + scanner = + table.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY, new Text()); + LOG.info("Obtained scanner " + scanner); + HStoreKey key = new HStoreKey(); + TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>(); + while (scanner.next(key, results)) { + assertTrue(key.getRow().equals(row)); + assertEquals(1, results.size()); + byte[] bytes = results.get(HConstants.COLUMN_FAMILY); + assertNotNull(bytes); + assertTrue(tableName.equals(new String(bytes, + HConstants.UTF8_ENCODING))); + } + LOG.info("Success!"); + } catch (IOException e) { + e.printStackTrace(); + } finally { + if (scanner != null) { + LOG.info("Closing scanner " + scanner); + try { + scanner.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + }; + return new Thread(runnable); + } +} \ No newline at end of file