Author: jimk Date: Tue Jun 5 08:11:57 2007 New Revision: 544512 URL: http://svn.apache.org/viewvc?view=rev&rev=544512 Log: HADOOP-1460 On shutdown IOException with complaint 'Cannot cancel lease that is not held'
Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-site.xml lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/Leases.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?view=diff&rev=544512&r1=544511&r2=544512 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original) +++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Tue Jun 5 08:11:57 2007 @@ -23,3 +23,5 @@ 12. HADOOP-1392. Part2: includes table compaction by merging adjacent regions that have shrunk in size. 13. HADOOP-1445 Support updates across region splits and compactions + 14. HADOOP-1460 On shutdown IOException with complaint 'Cannot cancel lease + that is not held' Modified: lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-site.xml URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-site.xml?view=diff&rev=544512&r1=544511&r2=544512 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-site.xml (original) +++ lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-site.xml Tue Jun 5 08:11:57 2007 @@ -1,54 +1,4 @@ <?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> - <property> - <name>hbase.regiondir</name> - <value>hbase</value> - <description>The directory shared by region servers. - </description> - </property> - <property> - <name>hbase.regionserver.msginterval</name> - <value>1000</value> - <description>Interval between messages from the RegionServer to HMaster - in milliseconds. Default is 15. Set this value low if you want unit - tests to be responsive. - </description> - </property> - <!-- - <property> - <name>hbase.master.meta.thread.rescanfrequency</name> - <value>600000</value> - <description>How long the HMaster sleeps (in milliseconds) between scans of - the root and meta tables. - </description> - </property> - <property> - <name>hbase.master.lease.period</name> - <value>360000</value> - <description>HMaster server lease period in milliseconds. Default is - 180 seconds.</description> - </property> - <property> - <name>hbase.regionserver.lease.period</name> - <value>360000</value> - <description>HMaster server lease period in milliseconds. Default is - 180 seconds.</description> - </property> - --> - <!-- - <property> - <name>hbase.hregion.max.filesize</name> - <value>3421223</value> - <description> - Maximum desired file size for an HRegion. If filesize exceeds - value + (value / 2), the HRegion is split in two. Default: 128M. - </description> - </property> - <property> - <name>hbase.client.timeout.length</name> - <value>10000</value> - <description>Client timeout in milliseconds</description> - </property> - --> </configuration> Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java?view=diff&rev=544512&r1=544511&r2=544512 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Tue Jun 5 08:11:57 2007 @@ -738,17 +738,17 @@ /** HRegionServers call this method upon startup. */ public void regionServerStartup(HServerInfo serverInfo) throws IOException { - String server = serverInfo.getServerAddress().toString().trim(); + String s = serverInfo.getServerAddress().toString().trim(); HServerInfo storedInfo = null; if(LOG.isDebugEnabled()) { - LOG.debug("received start message from: " + server); + LOG.debug("received start message from: " + s); } // If we get the startup message but there's an old server by that // name, then we can timeout the old one right away and register // the new one. - storedInfo = serversToServerInfo.remove(server); + storedInfo = serversToServerInfo.remove(s); if(storedInfo != null && !closed) { synchronized(msgQueue) { @@ -759,34 +759,40 @@ // Either way, record the new server - serversToServerInfo.put(server, serverInfo); + serversToServerInfo.put(s, serverInfo); if(!closed) { - Text serverLabel = new Text(server); - serverLeases.createLease(serverLabel, serverLabel, new ServerExpirer(server)); + Text serverLabel = new Text(s); + LOG.debug("Created lease for " + serverLabel); + serverLeases.createLease(serverLabel, serverLabel, new ServerExpirer(s)); } } /** HRegionServers call this method repeatedly. */ - public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[]) throws IOException { - String server = serverInfo.getServerAddress().toString().trim(); - Text serverLabel = new Text(server); - - if(closed - || (msgs.length == 1 && msgs[0].getMsg() == HMsg.MSG_REPORT_EXITING)) { - // We're shutting down. Or the HRegionServer is. - serversToServerInfo.remove(server); - serverLeases.cancelLease(serverLabel, serverLabel); + public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[]) + throws IOException { + String s = serverInfo.getServerAddress().toString().trim(); + Text serverLabel = new Text(s); + + if (closed || + msgs.length == 1 && msgs[0].getMsg() == HMsg.MSG_REPORT_EXITING) { + // HRegionServer is shutting down. + if (serversToServerInfo.remove(s) != null) { + // Only cancel lease once (This block can run a couple of times during + // shutdown). + LOG.debug("Cancelling lease for " + serverLabel); + serverLeases.cancelLease(serverLabel, serverLabel); + } HMsg returnMsgs[] = {new HMsg(HMsg.MSG_REGIONSERVER_STOP)}; return returnMsgs; } - HServerInfo storedInfo = serversToServerInfo.get(server); + HServerInfo storedInfo = serversToServerInfo.get(s); if(storedInfo == null) { if(LOG.isDebugEnabled()) { - LOG.debug("received server report from unknown server: " + server); + LOG.debug("received server report from unknown server: " + s); } // The HBaseMaster may have been restarted. @@ -808,7 +814,7 @@ // The answer is to ask A to shut down for good. if(LOG.isDebugEnabled()) { - LOG.debug("region server race condition detected: " + server); + LOG.debug("region server race condition detected: " + s); } HMsg returnMsgs[] = { @@ -821,11 +827,11 @@ // All's well. Renew the server's lease. // This will always succeed; otherwise, the fetch of serversToServerInfo // would have failed above. - + serverLeases.renewLease(serverLabel, serverLabel); // Refresh the info object - serversToServerInfo.put(server, serverInfo); + serversToServerInfo.put(s, serverInfo); // Next, process messages for this server return processMsgs(serverInfo, msgs); Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/Leases.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/Leases.java?view=diff&rev=544512&r1=544511&r2=544512 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/Leases.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/Leases.java Tue Jun 5 08:11:57 2007 @@ -22,7 +22,7 @@ import java.io.*; import java.util.*; -/******************************************************************************* +/** * Leases * * There are several server classes in HBase that need to track external clients @@ -36,9 +36,9 @@ * * An instance of the Leases class will create a thread to do its dirty work. * You should close() the instance if you want to clean up the thread properly. - ******************************************************************************/ + */ public class Leases { - private static final Log LOG = LogFactory.getLog(Leases.class); + static final Log LOG = LogFactory.getLog(Leases.class.getName()); long leasePeriod; long leaseCheckFrequency; @@ -83,22 +83,30 @@ LOG.debug("leases closed"); } } + + String getLeaseName(final Text holderId, final Text resourceId) { + return "<holderId=" + holderId + ", resourceId=" + resourceId + ">"; + } /** A client obtains a lease... */ - public void createLease(Text holderId, Text resourceId, LeaseListener listener) throws IOException { + public void createLease(Text holderId, Text resourceId, + final LeaseListener listener) + throws IOException { synchronized(leases) { synchronized(sortedLeases) { Lease lease = new Lease(holderId, resourceId, listener); Text leaseId = lease.getLeaseId(); if(leases.get(leaseId) != null) { throw new IOException("Impossible state for createLease(): Lease " + - "for holderId " + holderId + " and resourceId " + resourceId + - " is still held."); + getLeaseName(holderId, resourceId) + " is still held."); } leases.put(leaseId, lease); sortedLeases.add(lease); } } + if (LOG.isDebugEnabled()) { + LOG.debug("Created lease " + getLeaseName(holderId, resourceId)); + } } /** A client renews a lease... */ @@ -110,8 +118,8 @@ if(lease == null) { // It's possible that someone tries to renew the lease, but // it just expired a moment ago. So fail. - throw new IOException("Cannot renew lease; not held (holderId=" + - holderId + ", resourceId=" + resourceId + ")"); + throw new IOException("Cannot renew lease that is not held: " + + getLeaseName(holderId, resourceId)); } sortedLeases.remove(lease); @@ -119,9 +127,14 @@ sortedLeases.add(lease); } } + if (LOG.isDebugEnabled()) { + LOG.debug("Renewed lease " + getLeaseName(holderId, resourceId)); + } } - /** A client explicitly cancels a lease. The lease-cleanup method is not called. */ + /** A client explicitly cancels a lease. + * The lease-cleanup method is not called. + */ public void cancelLease(Text holderId, Text resourceId) throws IOException { synchronized(leases) { synchronized(sortedLeases) { @@ -132,7 +145,8 @@ // It's possible that someone tries to renew the lease, but // it just expired a moment ago. So fail. - throw new IOException("Cannot cancel lease that is not held (holderId=" + holderId + ", resourceId=" + resourceId + ")"); + throw new IOException("Cannot cancel lease that is not held: " + + getLeaseName(holderId, resourceId)); } sortedLeases.remove(lease); @@ -140,7 +154,10 @@ lease.cancelled(); } - } + } + if (LOG.isDebugEnabled()) { + LOG.debug("Cancel lease " + getLeaseName(holderId, resourceId)); + } } /** LeaseMonitor is a thread that expires Leases that go on too long. */ @@ -211,6 +228,10 @@ } public void expired() { + if (LOG.isDebugEnabled()) { + LOG.debug("Lease expired " + getLeaseName(this.holderId, + this.resourceId)); + } listener.leaseExpired(); }