Author: stack
Date: Tue Aug 24 23:57:50 2010
New Revision: 988766
URL: http://svn.apache.org/viewvc?rev=988766&view=rev
Log:
Made TestAdmin pass. Working on TestZookeeper now. Added first cut at
a ShutdownServerHandler.
M BRANCH_TODO.txt
Added a question.
M src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java
Make this test fail always rather than sometimes when it happens to
be carrying meta.
M src/test/java/org/apache/hadoop/hbase/master/TestServerManager.java
Renamed as TestDeadServer -- the functionality its actually testing.
M src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
Rename. Made it reference new DeadServer class instead of
ServerManager.
M src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
Made it pass. Move around some tests so looks like lots of changes
when there wasn't. Added assertions that we're enabled and disabled
to the test of 100 enable/disables.
M src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Minor improvement to NSRE message.
M src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
Added new splitLog method.
M src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
Moved all dead server stuff out to new DeadServer class.
M src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Added processing of dead server to AssignmentManager. Let it
process the RIT.
M src/main/java/org/apache/hadoop/hbase/master/HMaster.java
Pass MasterServices to CatalogManager instead of individual methods.
A src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
New class that encapsulates list of dead servers handling.
M
src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
First cut at a shutdown handler.
M src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
Added geting regions that were on a partiuclar server.
M src/main/resources/hbase-webapps/regionserver/regionserver.jsp
Fix compile error.
Added:
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
Removed:
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/master/TestServerManager.java
Modified:
hbase/branches/0.90_master_rewrite/BRANCH_TODO.txt
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hbase/branches/0.90_master_rewrite/src/main/resources/hbase-webapps/regionserver/regionserver.jsp
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
Modified: hbase/branches/0.90_master_rewrite/BRANCH_TODO.txt
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/BRANCH_TODO.txt?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
--- hbase/branches/0.90_master_rewrite/BRANCH_TODO.txt (original)
+++ hbase/branches/0.90_master_rewrite/BRANCH_TODO.txt Tue Aug 24 23:57:50 2010
@@ -284,4 +284,6 @@ Later:
(if finish, need to use files not directory, and use right location)
- Put notes from reviewboard/jira into LB javadoc or hbase "book"
-
+ Questions:
+
+ If region in RIT, do I need to wait on log replay if region was in OPENING or
PENDING_OPEN state?
Modified:
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
(original)
+++
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
Tue Aug 24 23:57:50 2010
@@ -23,11 +23,14 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import java.util.NavigableSet;
import java.util.TreeMap;
+import java.util.TreeSet;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
@@ -279,4 +282,29 @@ public class MetaReader {
metaServer.close(scannerid);
}
}
+
+ public static NavigableSet<HRegionInfo>
+ getServerRegions(CatalogTracker catalogTracker, final HServerInfo hsi)
+ throws IOException {
+ HRegionInterface metaServer =
+ catalogTracker.waitForMetaServerConnectionDefault();
+ NavigableSet<HRegionInfo> hris = new TreeSet<HRegionInfo>();
+ Scan scan = new Scan();
+ scan.addFamily(HConstants.CATALOG_FAMILY);
+ long scannerid = metaServer.openScanner(
+ HRegionInfo.FIRST_META_REGIONINFO.getRegionName(), scan);
+ try {
+ Result data;
+ while((data = metaServer.next(scannerid)) != null) {
+ if (data != null && data.size() > 0) {
+ Pair<HRegionInfo, HServerAddress> pair = metaRowToRegionPair(data);
+ if (!pair.getSecond().equals(hsi.getServerAddress())) continue;
+ hris.add(pair.getFirst());
+ }
+ }
+ return hris;
+ } finally {
+ metaServer.close(scannerid);
+ }
+ }
}
Modified:
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
(original)
+++
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Tue Aug 24 23:57:50 2010
@@ -905,6 +905,36 @@ public class AssignmentManager extends Z
}
}
+ public void processServerShutdown(final HServerInfo hsi) {
+ synchronized (regionsInTransition) {
+ // Iterate all regions in transition checking if were on this server
+ final String serverName = hsi.getServerName();
+ for (Map.Entry<String, RegionState> e:
this.regionsInTransition.entrySet()) {
+ if (!e.getKey().equals(serverName)) continue;
+ RegionState regionState = e.getValue();
+ switch(regionState.getState()) {
+ case OFFLINE:
+ case CLOSED:
+ case PENDING_OPEN:
+ case OPENING:
+ // TODO: Do I need to replay logs for PENDING_OPEN and OPENING?
+ // Maybe the server took on edits?
+ case PENDING_CLOSE:
+ case CLOSING:
+ LOG.info("Region " +
regionState.getRegion().getRegionNameAsString() +
+ " was in state=" + regionState.getStamp() + " on shutdown
server=" +
+ serverName + ", reassigning");
+ assign(regionState.getRegion());
+ break;
+
+ case OPEN:
+ LOG.warn("Long-running region in OPEN state? Should not happen");
+ break;
+ }
+ }
+ }
+ }
+
public static class RegionState implements Writable {
private HRegionInfo region;
Added:
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java?rev=988766&view=auto
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
(added)
+++
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
Tue Aug 24 23:57:50 2010
@@ -0,0 +1,133 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import org.apache.commons.lang.NotImplementedException;
+import org.apache.hadoop.hbase.HServerInfo;
+
+/**
+ * Class to hold dead servers list and utility querying dead server list.
+ */
+public class DeadServer implements Set<String> {
+ /**
+ * Set of known dead servers. On znode expiration, servers are added here.
+ * This is needed in case of a network partitioning where the server's lease
+ * expires, but the server is still running. After the network is healed,
+ * and it's server logs are recovered, it will be told to call server startup
+ * because by then, its regions have probably been reassigned.
+ */
+ private final Set<String> deadServers = new HashSet<String>();
+
+
+ /**
+ * @param serverName
+ * @return true if server is dead
+ */
+ public boolean isDeadServer(final String serverName) {
+ return isDeadServer(serverName, false);
+ }
+
+ /**
+ * @param serverName Servername as either <code>host:port</code> or
+ * <code>host,port,startcode</code>.
+ * @param hostAndPortOnly True if <code>serverName</code> is host and
+ * port only (<code>host:port</code>) and if so, then we do a prefix compare
+ * (ignoring start codes) looking for dead server.
+ * @return true if server is dead
+ */
+ boolean isDeadServer(final String serverName, final boolean hostAndPortOnly)
{
+ return HServerInfo.isServer(this, serverName, hostAndPortOnly);
+ }
+
+ public synchronized Set<String> clone() {
+ Set<String> clone = new HashSet<String>(this.deadServers.size());
+ clone.addAll(this.deadServers);
+ return clone;
+ }
+
+ public synchronized int size() {
+ return deadServers.size();
+ }
+
+ public synchronized boolean isEmpty() {
+ return deadServers.isEmpty();
+ }
+
+ public synchronized boolean contains(Object o) {
+ return deadServers.contains(o);
+ }
+
+ public Iterator<String> iterator() {
+ return this.deadServers.iterator();
+ }
+
+ public synchronized Object[] toArray() {
+ return deadServers.toArray();
+ }
+
+ public synchronized <T> T[] toArray(T[] a) {
+ return deadServers.toArray(a);
+ }
+
+ public synchronized boolean add(String e) {
+ return deadServers.add(e);
+ }
+
+ public synchronized boolean remove(Object o) {
+ return deadServers.remove(o);
+ }
+
+ public synchronized boolean containsAll(Collection<?> c) {
+ return deadServers.containsAll(c);
+ }
+
+ public synchronized boolean addAll(Collection<? extends String> c) {
+ return deadServers.addAll(c);
+ }
+
+ public synchronized boolean retainAll(Collection<?> c) {
+ return deadServers.retainAll(c);
+ }
+
+ public synchronized boolean removeAll(Collection<?> c) {
+ return deadServers.removeAll(c);
+ }
+
+ public synchronized void clear() {
+ throw new NotImplementedException();
+ }
+
+ public synchronized boolean equals(Object o) {
+ return deadServers.equals(o);
+ }
+
+ public synchronized int hashCode() {
+ return deadServers.hashCode();
+ }
+
+ public synchronized String toString() {
+ return this.deadServers.toString();
+ }
+}
\ No newline at end of file
Modified:
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
(original)
+++
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
Tue Aug 24 23:57:50 2010
@@ -54,10 +54,10 @@ import org.apache.hadoop.hbase.catalog.M
import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.MetaScanner;
+import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ServerConnection;
import org.apache.hadoop.hbase.client.ServerConnectionManager;
-import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType;
import org.apache.hadoop.hbase.ipc.HBaseRPC;
@@ -72,7 +72,6 @@ import org.apache.hadoop.hbase.master.ha
import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
-import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.Bytes;
@@ -121,8 +120,6 @@ implements HMasterInterface, HMasterRegi
private final Configuration conf;
// server for the web ui
private InfoServer infoServer;
- // Reporting to track master metrics.
- private final MasterMetrics metrics;
// Our zk client.
private ZooKeeperWatcher zooKeeper;
@@ -168,7 +165,7 @@ implements HMasterInterface, HMasterRegi
* <li>Connect to ZooKeeper and figure out if this is a fresh cluster start
or
* a failed over master
* <li>Block until becoming active master
- * <li>Initialize master components - server manager, region manager,
metrics,
+ * <li>Initialize master components - server manager, region manager,
* region server queue, file system manager, etc
* </ol>
* @throws InterruptedException
@@ -224,13 +221,11 @@ implements HMasterInterface, HMasterRegi
* 4. We are active master now... go initialize components we need to run.
*/
// TODO: Do this using Dependency Injection, using PicoContainer or Spring.
- this.metrics = new MasterMetrics(this.getName());
this.fileSystemManager = new MasterFileSystem(this);
this.connection = ServerConnectionManager.getConnection(conf);
this.executorService = new ExecutorService(getServerName());
- this.serverManager = new ServerManager(this, this.connection, metrics,
- fileSystemManager, this.executorService);
+ this.serverManager = new ServerManager(this, this);
this.catalogTracker = new CatalogTracker(this.zooKeeper, this.connection,
this, conf.getInt("hbase.master.catalog.timeout", -1));
Modified:
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
(original)
+++
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
Tue Aug 24 23:57:50 2010
@@ -166,22 +166,25 @@ public class MasterFileSystem {
if(onlineServers.get(serverName) == null) {
LOG.info("Log folder doesn't belong " +
"to a known region server, splitting");
- this.splitLogLock.lock();
- Path logDir =
- new Path(this.rootdir, HLog.getHLogDirectoryName(serverName));
- try {
- HLog.splitLog(this.rootdir, logDir, oldLogDir, this.fs, conf);
- } catch (IOException e) {
- LOG.error("Failed splitting " + logDir.toString(), e);
- } finally {
- this.splitLogLock.unlock();
- }
+ splitLog(serverName);
} else {
LOG.info("Log folder belongs to an existing region server");
}
}
}
+ public void splitLog(final String serverName) {
+ this.splitLogLock.lock();
+ Path logDir = new Path(this.rootdir,
HLog.getHLogDirectoryName(serverName));
+ try {
+ HLog.splitLog(this.rootdir, logDir, oldLogDir, this.fs, conf);
+ } catch (IOException e) {
+ LOG.error("Failed splitting " + logDir.toString(), e);
+ } finally {
+ this.splitLogLock.unlock();
+ }
+ }
+
/**
* Get the rootdir. Make sure its wholesome and exists before returning.
* @param rd
Modified:
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
(original)
+++
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
Tue Aug 24 23:57:50 2010
@@ -23,7 +23,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -44,7 +43,7 @@ import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.YouAreDeadException;
import org.apache.hadoop.hbase.client.ServerConnection;
-import org.apache.hadoop.hbase.executor.ExecutorService;
+import org.apache.hadoop.hbase.client.ServerConnectionManager;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
@@ -82,19 +81,8 @@ public class ServerManager {
private final Map<String, HRegionInterface> serverConnections =
new HashMap<String, HRegionInterface>();
- /**
- * Set of known dead servers. On znode expiration, servers are added here.
- * This is needed in case of a network partitioning where the server's lease
- * expires, but the server is still running. After the network is healed,
- * and it's server logs are recovered, it will be told to call server startup
- * because by then, its regions have probably been reassigned.
- */
- private final Set<String> deadServers =
- Collections.synchronizedSet(new HashSet<String>());
-
- private Server master;
-
- private MasterMetrics masterMetrics;
+ private final Server master;
+ private final MasterServices services;
private final ServerMonitor serverMonitorThread;
@@ -102,9 +90,10 @@ public class ServerManager {
private final OldLogsCleaner oldLogCleaner;
- private final ServerConnection connection;
+ // Reporting to track master metrics.
+ private final MasterMetrics metrics;
- private final ExecutorService executorService;
+ private final DeadServer deadservers = new DeadServer();
/**
* Dumps into log current stats on dead servers and number of servers
@@ -118,58 +107,37 @@ public class ServerManager {
@Override
protected void chore() {
int numServers = numServers();
- int numDeadServers = deadServers.size();
+ int numDeadServers = deadservers.size();
double averageLoad = getAverageLoad();
- String deadServersList = null;
- if (numDeadServers > 0) {
- StringBuilder sb = new StringBuilder("Dead Server [");
- boolean first = true;
- synchronized (deadServers) {
- for (String server: deadServers) {
- if (!first) {
- sb.append(", ");
- first = false;
- }
- sb.append(server);
- }
- }
- sb.append("]");
- deadServersList = sb.toString();
- }
+ String deadServersList = deadservers.toString();
LOG.info(numServers + " region servers, " + numDeadServers +
" dead, average load " + averageLoad +
- (deadServersList != null? deadServers: ""));
+ ((deadServersList != null && deadServersList.length() > 0)?
+ deadServersList: ""));
}
}
/**
* Constructor.
* @param master
- * @param masterMetrics If null, we won't pass metrics.
- * @param masterFileSystem
- * @param service ExecutorService instance.
- */
- public ServerManager(Server master,
- final ServerConnection connection,
- MasterMetrics masterMetrics,
- MasterFileSystem masterFileSystem,
- ExecutorService service) {
+ * @param services
+ */
+ public ServerManager(final Server master, final MasterServices services) {
this.master = master;
- this.masterMetrics = masterMetrics;
- this.connection = connection;
- this.executorService = service;
+ this.services = services;
Configuration c = master.getConfiguration();
int metaRescanInterval =
c.getInt("hbase.master.meta.thread.rescanfrequency",
60 * 1000);
this.minimumServerCount = c.getInt("hbase.regions.server.count.min", 1);
+ this.metrics = new MasterMetrics(master.getServerName());
this.serverMonitorThread = new ServerMonitor(metaRescanInterval, master);
String n = Thread.currentThread().getName();
Threads.setDaemonThreadRunning(this.serverMonitorThread,
n + ".serverMonitor");
this.oldLogCleaner = new OldLogsCleaner(
c.getInt("hbase.master.meta.thread.rescanfrequency",60 * 1000),
- master, c, masterFileSystem.getFileSystem(),
- masterFileSystem.getOldLogDir());
+ master, c, this.services.getMasterFileSystem().getFileSystem(),
+ this.services.getMasterFileSystem().getOldLogDir());
Threads.setDaemonThreadRunning(oldLogCleaner,
n + ".oldLogCleaner");
}
@@ -190,7 +158,8 @@ public class ServerManager {
// for processing by ProcessServerShutdown.
HServerInfo info = new HServerInfo(serverInfo);
String hostAndPort = info.getServerAddress().toString();
- HServerInfo existingServer =
haveServerWithSameHostAndPortAlready(info.getHostnamePort());
+ HServerInfo existingServer =
+ haveServerWithSameHostAndPortAlready(info.getHostnamePort());
if (existingServer != null) {
String message = "Server start rejected; we already have " + hostAndPort
+
" registered; existingServer=" + existingServer + ", newServer=" +
info;
@@ -225,9 +194,7 @@ public class ServerManager {
*/
private void checkIsDead(final String serverName, final String what)
throws YouAreDeadException {
- if (!isDead(serverName)) {
- return;
- }
+ if (!this.deadservers.isDeadServer(serverName)) return;
String message = "Server " + what + " rejected; currently processing " +
serverName + " as dead server";
LOG.debug(message);
@@ -360,8 +327,8 @@ public class ServerManager {
// Refresh the info object and the load information
this.onlineServers.put(serverInfo.getServerName(), serverInfo);
HServerLoad load = serverInfo.getLoad();
- if(load != null && this.masterMetrics != null) {
- masterMetrics.incrementRequests(load.getNumberOfRequests());
+ if (load != null && this.metrics != null) {
+ this.metrics.incrementRequests(load.getNumberOfRequests());
}
// No more piggyback messages on heartbeats for other stuff
return msgs;
@@ -424,6 +391,10 @@ public class ServerManager {
}
}
+ public Set<String> getDeadServers() {
+ return this.deadservers.clone();
+ }
+
/**
* @param hsa
* @return The HServerInfo whose HServerAddress is <code>hsa</code> or null
@@ -481,7 +452,7 @@ public class ServerManager {
" but server is not currently online");
return;
}
- if (this.deadServers.contains(serverName)) {
+ if (this.deadservers.contains(serverName)) {
// TODO: Can this happen? It shouldn't be online in this case?
LOG.warn("Received expiration of " + hsi.getServerName() +
" but server shutdown is already in progress");
@@ -500,49 +471,12 @@ public class ServerManager {
}
return;
}
- // Add to dead servers and queue a shutdown processing.
- this.deadServers.add(serverName);
- this.executorService.submit(new ServerShutdownHandler(master));
+ this.services.getExecutorService().submit(new
ServerShutdownHandler(this.master,
+ this.services, deadservers, info));
LOG.debug("Added=" + serverName +
" to dead servers, submitted shutdown handler to be executed");
}
- /**
- * @param serverName
- */
- void removeDeadServer(String serverName) {
- this.deadServers.remove(serverName);
- }
-
- /**
- * @param serverName
- * @return true if server is dead
- */
- public boolean isDead(final String serverName) {
- return isDead(serverName, false);
- }
-
- /**
- * @param serverName Servername as either <code>host:port</code> or
- * <code>host,port,startcode</code>.
- * @param hostAndPortOnly True if <code>serverName</code> is host and
- * port only (<code>host:port</code>) and if so, then we do a prefix compare
- * (ignoring start codes) looking for dead server.
- * @return true if server is dead
- */
- boolean isDead(final String serverName, final boolean hostAndPortOnly) {
- return isDead(this.deadServers, serverName, hostAndPortOnly);
- }
-
- static boolean isDead(final Set<String> deadServers,
- final String serverName, final boolean hostAndPortOnly) {
- return HServerInfo.isServer(deadServers, serverName, hostAndPortOnly);
- }
-
- Set<String> getDeadServers() {
- return this.deadServers;
- }
-
public boolean canAssignUserRegions() {
if (minimumServerCount == 0) {
return true;
@@ -597,11 +531,12 @@ public class ServerManager {
private HRegionInterface getServerConnection(HServerInfo info) {
try {
+ ServerConnection connection =
+ ServerConnectionManager.getConnection(this.master.getConfiguration());
HRegionInterface hri = serverConnections.get(info.getServerName());
if(hri == null) {
LOG.info("new connection");
- hri = this.connection.getHRegionConnection(
- info.getServerAddress(), false);
+ hri = connection.getHRegionConnection(info.getServerAddress(), false);
serverConnections.put(info.getServerName(), hri);
}
return hri;
Modified:
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
(original)
+++
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
Tue Aug 24 23:57:50 2010
@@ -19,20 +19,108 @@
*/
package org.apache.hadoop.hbase.master.handler;
+import java.io.IOException;
+import java.util.NavigableSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.executor.EventHandler;
+import org.apache.hadoop.hbase.master.DeadServer;
+import org.apache.hadoop.hbase.master.MasterServices;
public class ServerShutdownHandler extends EventHandler {
+ private static final Log LOG =
LogFactory.getLog(ServerShutdownHandler.class);
+ private final HServerInfo hsi;
+ private final Server server;
+ private final MasterServices services;
+ private final DeadServer deadServers;
- public ServerShutdownHandler(Server server) {
+ public ServerShutdownHandler(final Server server, final MasterServices
services,
+ final DeadServer deadServers, final HServerInfo hsi) {
super(server, EventType.M_SERVER_SHUTDOWN);
+ this.hsi = hsi;
+ this.server = server;
+ this.services = services;
+ this.deadServers = deadServers;
+ // Add to dead servers.
+ this.deadServers.add(hsi.getServerName());
}
@Override
- public void process() {
- // TODO: implement this
+ public void process() throws IOException {
+ checkRootHost();
+ try {
+ this.server.getCatalogTracker().waitForRoot();
+ } catch (InterruptedException e) {
+ // Reinterrupt
+ Thread.currentThread().interrupt();
+ throw new IOException("Interrupted", e);
+ }
+ checkMetaHost();
+ try {
+ this.server.getCatalogTracker().waitForMeta();
+ } catch (InterruptedException e) {
+ // Reinterrupt
+ Thread.currentThread().interrupt();
+ throw new IOException("Interrupted", e);
+ }
+ final String serverName = this.hsi.getServerName();
+ // First reassign regions in transition.
+ LOG.info("Cleaning regions-in-transition of references to " + serverName);
+ this.services.getAssignmentManager().processServerShutdown(this.hsi);
+ LOG.info("Splitting logs for " + serverName);
+ this.services.getMasterFileSystem().splitLog(serverName);
- // DO FIXUP IF FIND OFFLINED PARENT BUT DAUGHTERS NOT ON LINE
+ NavigableSet<HRegionInfo> hris =
+ MetaReader.getServerRegions(this.server.getCatalogTracker(), this.hsi);
+ LOG.info("Reassigning the " + hris.size() + " region(s) that " +
serverName +
+ " was carrying.");
+
+ // We should encounter -ROOT- and .META. first in the Set given how its
+ // as sorted set.
+ for (HRegionInfo hri: hris) {
+ // If table is not disabled but the region is offlined,
+ boolean disabled = this.services.getAssignmentManager().
+ isTableDisabled(hri.getTableDesc().getNameAsString());
+ if (disabled) continue;
+ if (hri.isOffline()) {
+ LOG.warn("TODO: DO FIXUP ON OFFLINED PARENT? REGION OFFLINE -- IS THIS
RIGHT?" + hri);
+ continue;
+ }
+ this.services.getAssignmentManager().assign(hri);
+ }
+ this.deadServers.remove(serverName);
+ LOG.info("Finished processing of shutdown of " + serverName);
+ }
+
+ void checkRootHost() throws IOException {
+ HServerAddress rootHsa;
+ try {
+ rootHsa = this.server.getCatalogTracker().getRootLocation();
+ } catch (InterruptedException e) {
+ // Reinterrupt
+ Thread.currentThread().interrupt();
+ throw new IOException("Interrupted", e);
+ }
+ if (this.hsi.getServerAddress().equals(rootHsa)) {
+ LOG.warn("WAS CARRYING ROOT -- DO I HAVE TO DO ANYTHING? CAN I HURRY
NOTIFICATION THAT ROOT IS GONE?");
+ }
+ return;
+ }
+
+ void checkMetaHost() {
+ HServerAddress metaHsa;
+ // TODO: Presumes one meta region only.
+ metaHsa = this.server.getCatalogTracker().getMetaLocation();
+ if (this.hsi.getServerAddress().equals(metaHsa)) {
+ LOG.warn("WAS CARRYING META -- DO I HAVE TO DO ANYTHING? CAN I HURRY
NOTIFICATION THAT META IS GONE");
+ }
+ return;
}
}
\ No newline at end of file
Modified:
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
(original)
+++
hbase/branches/0.90_master_rewrite/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Tue Aug 24 23:57:50 2010
@@ -1972,7 +1972,7 @@ public class HRegionServer implements HR
try {
region = getOnlineRegion(regionName);
if (region == null) {
- throw new NotServingRegionException(regionName);
+ throw new NotServingRegionException("Region is not online: " +
regionName);
}
return region;
} finally {
Modified:
hbase/branches/0.90_master_rewrite/src/main/resources/hbase-webapps/regionserver/regionserver.jsp
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/main/resources/hbase-webapps/regionserver/regionserver.jsp?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/main/resources/hbase-webapps/regionserver/regionserver.jsp
(original)
+++
hbase/branches/0.90_master_rewrite/src/main/resources/hbase-webapps/regionserver/regionserver.jsp
Tue Aug 24 23:57:50 2010
@@ -18,7 +18,7 @@
e.printStackTrace();
}
RegionServerMetrics metrics = regionServer.getMetrics();
- Collection<HRegionInfo> onlineRegions =
regionServer.getSortedOnlineRegionInfos();
+ Collection<HRegionInfo> onlineRegions = regionServer.getOnlineRegions();
int interval =
regionServer.getConfiguration().getInt("hbase.regionserver.msginterval",
3000)/1000;
%><?xml version="1.0" encoding="UTF-8" ?>
Modified:
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java
(original)
+++
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java
Tue Aug 24 23:57:50 2010
@@ -113,7 +113,8 @@ public class TestZooKeeper {
public void testRegionServerSessionExpired() throws Exception {
LOG.info("Starting testRegionServerSessionExpired");
new HTable(conf, HConstants.META_TABLE_NAME);
- TEST_UTIL.expireRegionServerSession(0);
+ int metaIndex = TEST_UTIL.getMiniHBaseCluster().getServerWithMeta();
+ TEST_UTIL.expireRegionServerSession(metaIndex);
testSanity();
}
@@ -133,13 +134,12 @@ public class TestZooKeeper {
HBaseAdmin admin = new HBaseAdmin(conf);
String tableName = "test"+System.currentTimeMillis();
- HTableDescriptor desc =
- new HTableDescriptor(tableName);
+ HTableDescriptor desc = new HTableDescriptor(tableName);
HColumnDescriptor family = new HColumnDescriptor("fam");
desc.addFamily(family);
admin.createTable(desc);
- HTable table = new HTable(tableName);
+ HTable table = new HTable(conf, tableName);
Put put = new Put(Bytes.toBytes("testrow"));
put.add(Bytes.toBytes("fam"),
Bytes.toBytes("col"), Bytes.toBytes("testdata"));
Modified:
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java?rev=988766&r1=988765&r2=988766&view=diff
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
(original)
+++
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
Tue Aug 24 23:57:50 2010
@@ -26,10 +26,7 @@ import static org.junit.Assert.assertTru
import java.io.IOException;
import java.util.Iterator;
-import java.util.List;
import java.util.Map;
-import java.util.NavigableSet;
-import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -41,16 +38,15 @@ import org.apache.hadoop.hbase.HConstant
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.TableNotDisabledException;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.executor.EventHandler;
-import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
+import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
@@ -85,31 +81,6 @@ public class TestAdmin {
this.admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
}
- @Test public void testSplitCompactFlushClose() throws IOException {
- final byte [] tableName = Bytes.toBytes("testSplitCompactFlushClose");
- TEST_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY);
- HTable t = new HTable(TEST_UTIL.getConfiguration(), tableName);
- TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
- NavigableSet<HRegionInfo> hris = getClusterRegions();
- assertFalse(hris.isEmpty());
- this.admin.split(tableName);
- NavigableSet<HRegionInfo> splitHris = getClusterRegions();
- assertFalse(splitHris.isEmpty());
- int originalCount = hris.size();
- int postSplitCount = splitHris.size();
- assertTrue(postSplitCount > originalCount);
- }
-
- private NavigableSet<HRegionInfo> getClusterRegions() {
- MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
- List<RegionServerThread> rss = cluster.getRegionServerThreads();
- NavigableSet<HRegionInfo> hris = new TreeSet<HRegionInfo>();
- for (RegionServerThread rst: rss) {
- hris.addAll(rst.getRegionServer().getOnlineRegions());
- }
- return hris;
- }
-
@Test
public void testCreateTable() throws IOException {
HTableDescriptor [] tables = admin.listTables();
@@ -120,6 +91,21 @@ public class TestAdmin {
assertEquals(numTables + 1, tables.length);
}
+ @Test
+ public void testGetTableDescriptor() throws IOException {
+ HColumnDescriptor fam1 = new HColumnDescriptor("fam1");
+ HColumnDescriptor fam2 = new HColumnDescriptor("fam2");
+ HColumnDescriptor fam3 = new HColumnDescriptor("fam3");
+ HTableDescriptor htd = new HTableDescriptor("myTestTable");
+ htd.addFamily(fam1);
+ htd.addFamily(fam2);
+ htd.addFamily(fam3);
+ this.admin.createTable(htd);
+ HTable table = new HTable(TEST_UTIL.getConfiguration(), "myTestTable");
+ HTableDescriptor confirmedHtd = table.getTableDescriptor();
+ assertEquals(htd.compareTo(confirmedHtd), 0);
+ }
+
/**
* Verify schema modification takes.
* @throws IOException
@@ -441,15 +427,20 @@ public class TestAdmin {
Put put = new Put(row);
put.add(HConstants.CATALOG_FAMILY, qualifier, value);
ht.put(put);
+ Get get = new Get(row);
+ get.addColumn(HConstants.CATALOG_FAMILY, qualifier);
+ ht.get(get);
this.admin.disableTable(table);
// Test that table is disabled
- Get get = new Get(row);
+ get = new Get(row);
get.addColumn(HConstants.CATALOG_FAMILY, qualifier);
boolean ok = false;
try {
ht.get(get);
+ } catch (NotServingRegionException e) {
+ ok = true;
} catch (RetriesExhaustedException e) {
ok = true;
}
@@ -547,8 +538,8 @@ public class TestAdmin {
}
};
t.start();
- // tell the master to split the table
- admin.split(Bytes.toString(tableName));
+ // Split the table
+ this.admin.split(Bytes.toString(tableName));
t.join();
// Verify row count
@@ -757,26 +748,13 @@ public class TestAdmin {
for(int i = 0; i < times; i++) {
String tableName = "table"+i;
this.admin.disableTable(tableName);
+ byte [] tableNameBytes = Bytes.toBytes(tableName);
+ assertTrue(this.admin.isTableDisabled(tableNameBytes));
this.admin.enableTable(tableName);
+ assertFalse(this.admin.isTableDisabled(tableNameBytes));
this.admin.disableTable(tableName);
+ assertTrue(this.admin.isTableDisabled(tableNameBytes));
this.admin.deleteTable(tableName);
}
}
-
- @Test
- public void testGetTableDescriptor() throws IOException {
- HColumnDescriptor fam1 = new HColumnDescriptor("fam1");
- HColumnDescriptor fam2 = new HColumnDescriptor("fam2");
- HColumnDescriptor fam3 = new HColumnDescriptor("fam3");
- HTableDescriptor htd = new HTableDescriptor("myTestTable");
- htd.addFamily(fam1);
- htd.addFamily(fam2);
- htd.addFamily(fam3);
- this.admin.createTable(htd);
- HTable table = new HTable("myTestTable");
- HTableDescriptor confirmedHtd = table.getTableDescriptor();
-
- assertEquals(htd.compareTo(confirmedHtd), 0);
- }
}
-
Added:
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java?rev=988766&view=auto
==============================================================================
---
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
(added)
+++
hbase/branches/0.90_master_rewrite/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
Tue Aug 24 23:57:50 2010
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.junit.Assert.*;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Test;
+
+
+public class TestDeadServer {
+ @Test public void testIsDead() {
+ Set<String> deadServers = new HashSet<String>();
+ DeadServer ds = new DeadServer();
+ final String hostname123 = "one,123,3";
+ assertFalse(ds.isDeadServer(hostname123, false));
+ assertFalse(ds.isDeadServer(hostname123, true));
+ deadServers.add(hostname123);
+ assertTrue(ds.isDeadServer(hostname123, false));
+ assertFalse(ds.isDeadServer("one:1", true));
+ assertFalse(ds.isDeadServer("one:1234", true));
+ assertTrue(ds.isDeadServer("one:123", true));
+ }
+}
\ No newline at end of file