Author: stack
Date: Mon Sep 20 19:04:41 2010
New Revision: 999057
URL: http://svn.apache.org/viewvc?rev=999057&view=rev
Log:
HBASE-3017 More log pruning
M conf/log4j.properties
Make ZKW log at INFO-level
M src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Log message cleanup.
M src/main/java/org/apache/hadoop/hbase/master/HMaster.java
Remove excessive hostname+port qualifier on master for zk messages
M src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
Log message cleanup
M src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
Format the ServerMonitor message.
M src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Remove excessive hostname on zk message id; just add port
M src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
Cleanup of messages.
Modified:
hbase/trunk/CHANGES.txt
hbase/trunk/conf/log4j.properties
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
Modified: hbase/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=999057&r1=999056&r2=999057&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Mon Sep 20 19:04:41 2010
@@ -919,6 +919,7 @@ Release 0.21.0 - Unreleased
HBASE-2988 Support alternate compression for major compactions
HBASE-2941 port HADOOP-6713 - threading scalability for RPC reads - to
HBase
HBASE-2782 QOS for META table access
+ HBASE-3017 More log pruning
NEW FEATURES
HBASE-1961 HBase EC2 scripts
Modified: hbase/trunk/conf/log4j.properties
URL:
http://svn.apache.org/viewvc/hbase/trunk/conf/log4j.properties?rev=999057&r1=999056&r2=999057&view=diff
==============================================================================
--- hbase/trunk/conf/log4j.properties (original)
+++ hbase/trunk/conf/log4j.properties Mon Sep 20 19:04:41 2010
@@ -43,6 +43,8 @@ log4j.appender.console.layout.Conversion
log4j.logger.org.apache.zookeeper=INFO
#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
log4j.logger.org.apache.hadoop.hbase=DEBUG
+# Make these two classes INFO-level. Make them DEBUG to see more zk debug.
+log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO
+log4j.logger.org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher=INFO
#log4j.logger.org.apache.hadoop.dfs=DEBUG
# Set this class to log INFO only otherwise its OTT
-log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=999057&r1=999056&r2=999057&view=diff
==============================================================================
---
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
(original)
+++
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Mon Sep 20 19:04:41 2010
@@ -253,7 +253,7 @@ public class AssignmentManager extends Z
RegionState regionState = regionsInTransition.get(encodedName);
switch(data.getEventType()) {
case M2ZK_REGION_OFFLINE:
- LOG.warn("What to do with this event? " + data);
+ // Nothing to do.
break;
case RS2ZK_REGION_CLOSING:
@@ -545,8 +545,8 @@ public class AssignmentManager extends Z
if (plan == null) {
LOG.debug("No previous transition plan for " +
state.getRegion().getRegionNameAsString() +
- " so generating a random one from " +
serverManager.countOfRegionServers() +
- " ( " + serverManager.getOnlineServers().size() + ") available
servers");
+ " so generating a random one; " +
serverManager.countOfRegionServers() +
+ " (online=" + serverManager.getOnlineServers().size() + ")
available servers");
plan = new RegionPlan(state.getRegion(), null,
LoadBalancer.randomAssignment(serverManager.getOnlineServersList()));
regionPlans.put(encodedName, plan);
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=999057&r1=999056&r2=999057&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Mon
Sep 20 19:04:41 2010
@@ -203,8 +203,7 @@ implements HMasterInterface, HMasterRegi
"_" + System.currentTimeMillis());
}
- this.zooKeeper =
- new ZooKeeperWatcher(conf, MASTER + "-" + getMasterAddress(), this);
+ this.zooKeeper = new ZooKeeperWatcher(conf, MASTER, this);
/*
* 2. Block on becoming the active master.
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java?rev=999057&r1=999056&r2=999057&view=diff
==============================================================================
---
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
(original)
+++
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
Mon Sep 20 19:04:41 2010
@@ -165,13 +165,13 @@ public class MasterFileSystem {
}
for (FileStatus status : logFolders) {
String serverName = status.getPath().getName();
- LOG.info("Found log folder : " + serverName);
- if(onlineServers.get(serverName) == null) {
- LOG.info("Log folder doesn't belong " +
+ if (onlineServers.get(serverName) == null) {
+ LOG.info("Log folder " + status.getPath() + " doesn't belong " +
"to a known region server, splitting");
splitLog(serverName);
} else {
- LOG.info("Log folder belongs to an existing region server");
+ LOG.info("Log folder " + status.getPath() +
+ " belongs to an existing region server");
}
}
}
@@ -279,4 +279,4 @@ public class MasterFileSystem {
new Path(rootdir, region.getTableDesc().getNameAsString()),
region.getEncodedName(), familyName), true);
}
-}
\ No newline at end of file
+}
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=999057&r1=999056&r2=999057&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
Mon Sep 20 19:04:41 2010
@@ -27,6 +27,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
+import java.text.DecimalFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -93,6 +94,8 @@ public class ServerManager {
private final DeadServer deadservers = new DeadServer();
+ private static final DecimalFormat DF = new DecimalFormat("#.##");
+
/**
* Dumps into log current stats on dead servers and number of servers
* TODO: Make this a metric; dump metrics into log.
@@ -108,10 +111,9 @@ public class ServerManager {
int numDeadServers = deadservers.size();
double averageLoad = getAverageLoad();
String deadServersList = deadservers.toString();
- LOG.info(numServers + " region servers, " + numDeadServers +
- " dead, average load " + averageLoad +
- ((deadServersList != null && deadServersList.length() > 0)?
- deadServersList: ""));
+ LOG.info("regionservers=" + numServers +
+ ", averageload=" + DF.format(averageLoad) +
+ ((numDeadServers > 0)? ("deadservers=" + deadServersList): ""));
}
}
@@ -422,7 +424,7 @@ public class ServerManager {
LOG.info("Waiting on following regionserver(s) to go down " +
this.onlineServers.values());
try {
- this.onlineServers.wait(500);
+ this.onlineServers.wait(1000);
} catch (InterruptedException e) {
// continue
}
@@ -516,7 +518,7 @@ public class ServerManager {
HConnectionManager.getConnection(this.master.getConfiguration());
HRegionInterface hri = serverConnections.get(info.getServerName());
if (hri == null) {
- LOG.info("new connection");
+ LOG.debug("New connection to " + info.getServerName());
hri = connection.getHRegionConnection(info.getServerAddress(), false);
serverConnections.put(info.getServerName(), hri);
}
@@ -537,9 +539,10 @@ public class ServerManager {
getLong("hbase.master.wait.on.regionservers.interval", 3000);
// So, number of regionservers > 0 and its been n since last check in,
break,
// else just stall here
+ int count = 0;
for (int oldcount = countOfRegionServers(); !this.master.isStopped();) {
Thread.sleep(interval);
- int count = countOfRegionServers();
+ count = countOfRegionServers();
if (count == oldcount && count > 0) break;
if (count == 0) {
LOG.info("Waiting on regionserver(s) to checkin");
@@ -548,6 +551,8 @@ public class ServerManager {
}
oldcount = count;
}
+ LOG.info("Exiting wait on regionserver(s) to checkin; count=" + count +
+ ", stopped=" + this.master.isStopped());
}
public List<HServerInfo> getOnlineServersList() {
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=999057&r1=999056&r2=999057&view=diff
==============================================================================
---
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
(original)
+++
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Mon Sep 20 19:04:41 2010
@@ -431,8 +431,8 @@ public class HRegionServer implements HR
private void initializeZooKeeper() throws IOException, InterruptedException {
// open connection to zookeeper and set primary watcher
- zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + "-"
- + serverInfo.getServerName(), this);
+ zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + ":" +
+ serverInfo.getServerAddress().getPort(), this);
this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this);
this.clusterStatusTracker.start();
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java?rev=999057&r1=999056&r2=999057&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
Mon Sep 20 19:04:41 2010
@@ -135,8 +135,8 @@ public class ZKAssign {
public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo
region,
String serverName, final EventType event)
throws KeeperException, KeeperException.NodeExistsException {
- LOG.debug(zkw.prefix("Creating an unassigned node for " +
- region.getEncodedName() + " in an OFFLINE state"));
+ LOG.debug(zkw.prefix("Creating unassigned node for " +
+ region.getEncodedName() + " in OFFLINE state"));
RegionTransitionData data = new RegionTransitionData(event,
region.getRegionName(), serverName);
synchronized(zkw.getNodes()) {
@@ -167,8 +167,8 @@ public class ZKAssign {
public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
String serverName)
throws KeeperException, KeeperException.NoNodeException {
- LOG.debug(zkw.prefix("Forcing an existing unassigned node for " +
- region.getEncodedName() + " to an OFFLINE state"));
+ LOG.debug(zkw.prefix("Forcing existing unassigned node for " +
+ region.getEncodedName() + " to OFFLINE state"));
RegionTransitionData data = new RegionTransitionData(
EventType.M2ZK_REGION_OFFLINE, region.getRegionName(), serverName);
synchronized(zkw.getNodes()) {
@@ -200,8 +200,8 @@ public class ZKAssign {
public static boolean createOrForceNodeOffline(ZooKeeperWatcher zkw,
HRegionInfo region, String serverName)
throws KeeperException {
- LOG.debug(zkw.prefix("Creating or updating an unassigned node for " +
- region.getEncodedName() + " with an OFFLINE state"));
+ LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
+ region.getEncodedName() + " with OFFLINE state"));
RegionTransitionData data = new RegionTransitionData(
EventType.M2ZK_REGION_OFFLINE, region.getRegionName(), serverName);
synchronized(zkw.getNodes()) {
@@ -319,7 +319,7 @@ public class ZKAssign {
private static boolean deleteNode(ZooKeeperWatcher zkw, String regionName,
EventType expectedState)
throws KeeperException, KeeperException.NoNodeException {
- LOG.debug(zkw.prefix("Deleting an existing unassigned " +
+ LOG.debug(zkw.prefix("Deleting existing unassigned " +
"node for " + regionName + " that is in expected state " +
expectedState));
String node = getNodeName(zkw, regionName);
Stat stat = new Stat();
@@ -329,7 +329,7 @@ public class ZKAssign {
}
RegionTransitionData data = RegionTransitionData.fromBytes(bytes);
if(!data.getEventType().equals(expectedState)) {
- LOG.warn(zkw.prefix("Attempting to delete an unassigned " +
+ LOG.warn(zkw.prefix("Attempting to delete unassigned " +
"node in " + expectedState +
" state but node is in " + data.getEventType() + " state"));
return false;
@@ -338,7 +338,7 @@ public class ZKAssign {
// TODO: Does this go here or only if we successfully delete node?
zkw.getNodes().remove(node);
if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
- LOG.warn(zkw.prefix("Attempting to delete an " +
+ LOG.warn(zkw.prefix("Attempting to delete " +
"unassigned node in " + expectedState +
" state but " +
"after verifying it was in OPENED state, we got a version
mismatch"));
@@ -392,7 +392,7 @@ public class ZKAssign {
public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
String serverName)
throws KeeperException, KeeperException.NodeExistsException {
- LOG.debug(zkw.prefix("Creating an unassigned node for " +
+ LOG.debug(zkw.prefix("Creating unassigned node for " +
region.getEncodedName() + " in a CLOSING state"));
RegionTransitionData data = new RegionTransitionData(
EventType.RS2ZK_REGION_CLOSING, region.getRegionName(), serverName);