Author: stack
Date: Sun Oct 10 06:04:59 2010
New Revision: 1006252

URL: http://svn.apache.org/viewvc?rev=1006252&view=rev
Log:
HBASE-3098 TestMetaReaderEditor is broken in TRUNK; hangs; part2 of the patch

Modified:
    
hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HConnection.java
    
hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
    
hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java
    
hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java
    
hbase/trunk/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
    
hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java 
(original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java 
Sun Oct 10 06:04:59 2010
@@ -53,7 +53,8 @@ import org.apache.zookeeper.KeeperExcept
  * the location of <code>.META.</code>  If not available in 
<code>-ROOT-</code>,
  * ZooKeeper is used to monitor for a new location of <code>.META.</code>.
  *
- * <p>Call {...@link #start()} to start up operation.
+ * <p>Call {...@link #start()} to start up operation.  Call {...@link 
#stop()}} to
+ * interrupt waits and close up shop.
  */
 public class CatalogTracker {
   private static final Log LOG = LogFactory.getLog(CatalogTracker.class);
@@ -64,6 +65,7 @@ public class CatalogTracker {
   private final AtomicBoolean metaAvailable = new AtomicBoolean(false);
   private HServerAddress metaLocation;
   private final int defaultTimeout;
+  private boolean stopped = false;
 
   public static final byte [] ROOT_REGION =
     HRegionInfo.ROOT_REGIONINFO.getRegionName();
@@ -130,6 +132,22 @@ public class CatalogTracker {
   }
 
   /**
+   * Stop working.
+   * Interrupts any ongoing waits.
+   */
+  public void stop() {
+    LOG.debug("Stopping catalog tracker " + this.connection.toString() +
+      "; will interrupt blocked waits on root and meta");
+    this.stopped = true;
+    this.rootRegionTracker.stop();
+    this.metaNodeTracker.stop();
+    // Call this and it will interrupt any ongoing waits on meta.
+    synchronized (this.metaAvailable) {
+      this.metaAvailable.notifyAll();
+    }
+  }
+
+  /**
    * Gets the current location for <code>-ROOT-</code> or null if location is
    * not currently available.
    * @return location of root, null if not available
@@ -274,8 +292,8 @@ public class CatalogTracker {
    * @throws InterruptedException if interrupted while waiting
    */
   public void waitForMeta() throws InterruptedException {
-    synchronized(metaAvailable) {
-      while (!metaAvailable.get()) {
+    synchronized (metaAvailable) {
+      while (!stopped && !metaAvailable.get()) {
         metaAvailable.wait();
       }
     }
@@ -301,7 +319,7 @@ public class CatalogTracker {
       if (getMetaServerConnection(true) != null) {
         return metaLocation;
       }
-      while(!metaAvailable.get() &&
+      while(!stopped && !metaAvailable.get() &&
           (timeout == 0 || System.currentTimeMillis() < stop)) {
         metaAvailable.wait(timeout);
       }
@@ -486,4 +504,8 @@ public class CatalogTracker {
   MetaNodeTracker getMetaNodeTracker() {
     return this.metaNodeTracker;
   }
+
+  public HConnection getConnection() {
+    return this.connection;
+  }
 }
\ No newline at end of file

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java 
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java 
Sun Oct 10 06:04:59 2010
@@ -66,11 +66,6 @@ public class HBaseAdmin implements Abort
   private volatile Configuration conf;
   private final long pause;
   private final int numRetries;
-  /**
-   * Lazily instantiated.  Use {...@link #getCatalogTracker()} to ensure you 
get
-   * an instance rather than a null.
-   */
-  private CatalogTracker catalogTracker = null;
 
   /**
    * Constructor
@@ -88,21 +83,32 @@ public class HBaseAdmin implements Abort
     this.connection.getMaster();
   }
 
+  /**
+   * @return A new CatalogTracker instance; call {...@link 
#cleanupCatalogTracker(CatalogTracker)}
+   * to cleanup the returned catalog tracker.
+   * @throws ZooKeeperConnectionException
+   * @throws IOException
+   * @see #cleanupCatalogTracker(CatalogTracker);
+   */
   private synchronized CatalogTracker getCatalogTracker()
   throws ZooKeeperConnectionException, IOException {
-    if (this.catalogTracker == null) {
-      this.catalogTracker = new 
CatalogTracker(this.connection.getZooKeeperWatcher(),
-        HConnectionManager.getConnection(conf), this,
-        this.conf.getInt("hbase.admin.catalog.timeout", 10 * 1000));
-      try {
-        this.catalogTracker.start();
-      } catch (InterruptedException e) {
-        // Let it out as an IOE for now until we redo all so tolerate IEs
-        Thread.currentThread().interrupt();
-        throw new IOException("Interrupted", e);
-      }
-    }
-    return this.catalogTracker;
+    CatalogTracker ct = null;
+    try {
+      HConnection connection =
+        HConnectionManager.getConnection(new Configuration(this.conf));
+      ct = new CatalogTracker(connection);
+      ct.start();
+    } catch (InterruptedException e) {
+      // Let it out as an IOE for now until we redo all so tolerate IEs
+      Thread.currentThread().interrupt();
+      throw new IOException("Interrupted", e);
+    }
+    return ct;
+  }
+
+  private void cleanupCatalogTracker(final CatalogTracker ct) {
+    ct.stop();
+    HConnectionManager.deleteConnection(ct.getConnection());
   }
 
   @Override
@@ -142,7 +148,14 @@ public class HBaseAdmin implements Abort
    */
   public boolean tableExists(final String tableName)
   throws IOException {
-    return MetaReader.tableExists(getCatalogTracker(), tableName);
+    boolean b = false;
+    CatalogTracker ct = getCatalogTracker();
+    try {
+      b = MetaReader.tableExists(ct, tableName);
+    } finally {
+      cleanupCatalogTracker(ct);
+    }
+    return b;
   }
 
   /**
@@ -718,15 +731,20 @@ public class HBaseAdmin implements Abort
    */
   public void closeRegion(final byte [] regionname, final String hostAndPort)
   throws IOException {
-    if (hostAndPort != null) {
-      HServerAddress hsa = new HServerAddress(hostAndPort);
-      Pair<HRegionInfo, HServerAddress> pair =
-        MetaReader.getRegion(getCatalogTracker(), regionname);
-      closeRegion(hsa, pair.getFirst());
-    } else {
-      Pair<HRegionInfo, HServerAddress> pair =
-        MetaReader.getRegion(getCatalogTracker(), regionname);
-      closeRegion(pair.getSecond(), pair.getFirst());
+    CatalogTracker ct = getCatalogTracker();
+    try {
+      if (hostAndPort != null) {
+        HServerAddress hsa = new HServerAddress(hostAndPort);
+        Pair<HRegionInfo, HServerAddress> pair =
+          MetaReader.getRegion(ct, regionname);
+        closeRegion(hsa, pair.getFirst());
+      } else {
+        Pair<HRegionInfo, HServerAddress> pair =
+          MetaReader.getRegion(ct, regionname);
+        closeRegion(pair.getSecond(), pair.getFirst());
+      }
+    } finally {
+      cleanupCatalogTracker(ct);
     }
   }
 
@@ -760,17 +778,22 @@ public class HBaseAdmin implements Abort
   public void flush(final byte [] tableNameOrRegionName)
   throws IOException, InterruptedException {
     boolean isRegionName = isRegionName(tableNameOrRegionName);
-    if (isRegionName) {
-      Pair<HRegionInfo, HServerAddress> pair =
-        MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
-      flush(pair.getSecond(), pair.getFirst());
-    } else {
-      List<Pair<HRegionInfo, HServerAddress>> pairs =
-        MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
-          Bytes.toString(tableNameOrRegionName));
-      for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
+    CatalogTracker ct = getCatalogTracker();
+    try {
+      if (isRegionName) {
+        Pair<HRegionInfo, HServerAddress> pair =
+          MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
         flush(pair.getSecond(), pair.getFirst());
+      } else {
+        List<Pair<HRegionInfo, HServerAddress>> pairs =
+          MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
+              Bytes.toString(tableNameOrRegionName));
+        for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
+          flush(pair.getSecond(), pair.getFirst());
+        }
       }
+    } finally {
+      cleanupCatalogTracker(ct);
     }
   }
 
@@ -843,17 +866,22 @@ public class HBaseAdmin implements Abort
    */
   private void compact(final byte [] tableNameOrRegionName, final boolean 
major)
   throws IOException, InterruptedException {
-    if (isRegionName(tableNameOrRegionName)) {
-      Pair<HRegionInfo, HServerAddress> pair =
-        MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
-      compact(pair.getSecond(), pair.getFirst(), major);
-    } else {
-      List<Pair<HRegionInfo, HServerAddress>> pairs =
-        MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
-          Bytes.toString(tableNameOrRegionName));
-      for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
+    CatalogTracker ct = getCatalogTracker();
+    try {
+      if (isRegionName(tableNameOrRegionName)) {
+        Pair<HRegionInfo, HServerAddress> pair =
+          MetaReader.getRegion(ct, tableNameOrRegionName);
         compact(pair.getSecond(), pair.getFirst(), major);
+      } else {
+        List<Pair<HRegionInfo, HServerAddress>> pairs =
+          MetaReader.getTableRegionsAndLocations(ct,
+              Bytes.toString(tableNameOrRegionName));
+        for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
+          compact(pair.getSecond(), pair.getFirst(), major);
+        }
       }
+    } finally {
+      cleanupCatalogTracker(ct);
     }
   }
 
@@ -920,19 +948,25 @@ public class HBaseAdmin implements Abort
    * @throws IOException if a remote or network exception occurs
    * @throws InterruptedException 
    */
-  public void split(final byte [] tableNameOrRegionName) throws IOException, 
InterruptedException {
-    if (isRegionName(tableNameOrRegionName)) {
-      // Its a possible region name.
-      Pair<HRegionInfo, HServerAddress> pair =
-        MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
-      split(pair.getSecond(), pair.getFirst());
-    } else {
-      List<Pair<HRegionInfo, HServerAddress>> pairs =
-        MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
-          Bytes.toString(tableNameOrRegionName));
-      for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
+  public void split(final byte [] tableNameOrRegionName)
+  throws IOException, InterruptedException {
+    CatalogTracker ct = getCatalogTracker();
+    try {
+      if (isRegionName(tableNameOrRegionName)) {
+        // Its a possible region name.
+        Pair<HRegionInfo, HServerAddress> pair =
+          MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
         split(pair.getSecond(), pair.getFirst());
+      } else {
+        List<Pair<HRegionInfo, HServerAddress>> pairs =
+          MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
+              Bytes.toString(tableNameOrRegionName));
+        for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
+          split(pair.getSecond(), pair.getFirst());
+        }
       }
+    } finally {
+      cleanupCatalogTracker(ct);
     }
   }
 

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HConnection.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HConnection.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HConnection.java 
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HConnection.java 
Sun Oct 10 06:04:59 2010
@@ -25,6 +25,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ExecutorService;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HRegionLocation;
@@ -42,6 +43,11 @@ import org.apache.hadoop.hbase.zookeeper
  */
 public interface HConnection extends Abortable {
   /**
+   * @return Configuration instance being used by this HConnection instance.
+   */
+  public Configuration getConfiguration();
+
+  /**
    * Retrieve ZooKeeperWatcher used by the connection.
    * @return ZooKeeperWatcher handle being used by the connection.
    * @throws IOException if a remote or network exception occurs

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
 (original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
 Sun Oct 10 06:04:59 2010
@@ -140,6 +140,14 @@ public class HConnectionManager {
   }
 
   /**
+   * Delete connection information for the instance
+   * @param connection configuration
+   */
+  public static void deleteConnection(HConnection connection) {
+    deleteConnection(connection.getConfiguration(), false);
+  }
+
+  /**
    * Delete information for all connections.
    * @param stopProxy stop the proxy as well
    * @throws IOException
@@ -231,17 +239,12 @@ public class HConnectionManager {
     public HConnectionImplementation(Configuration conf)
     throws ZooKeeperConnectionException {
       this.conf = conf;
-
-      String serverClassName =
-        conf.get(HConstants.REGION_SERVER_CLASS,
-            HConstants.DEFAULT_REGION_SERVER_CLASS);
-
+      String serverClassName = conf.get(HConstants.REGION_SERVER_CLASS,
+        HConstants.DEFAULT_REGION_SERVER_CLASS);
       this.closed = false;
-
       try {
         this.serverInterfaceClass =
           (Class<? extends HRegionInterface>) Class.forName(serverClassName);
-
       } catch (ClassNotFoundException e) {
         throw new UnsupportedOperationException(
             "Unable to find region server interface " + serverClassName, e);
@@ -271,6 +274,10 @@ public class HConnectionManager {
       this.masterChecked = false;
     }
 
+    public Configuration getConfiguration() {
+      return this.conf;
+    }
+
     @Override
     public String toString() {
       return this.identifier;

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Sun 
Oct 10 06:04:59 2010
@@ -287,6 +287,7 @@ implements HMasterInterface, HMasterRegi
 
       // Stop services started for both backup and active masters
       this.activeMasterManager.stop();
+      this.catalogTracker.stop();
       HConnectionManager.deleteConnection(this.conf, true);
       this.zooKeeper.close();
       LOG.info("HMaster main thread exiting");

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 (original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 Sun Oct 10 06:04:59 2010
@@ -592,6 +592,9 @@ public class HRegionServer implements HR
       closeAllScanners();
       LOG.info("stopping server at: " + this.serverInfo.getServerName());
     }
+    // Interrupt catalog tracker here in case any regions being opened out in
+    // handlers are stuck waiting on meta or root.
+    this.catalogTracker.stop();
     waitOnAllRegionsToClose();
 
     // Make sure the proxy is down.
@@ -1240,8 +1243,6 @@ public class HRegionServer implements HR
         r.hasReferences()? "Region has references on open" :
           "Region has too many store files");
     }
-    // Add to online regions
-    addToOnlineRegions(r);
     // Update ZK, ROOT or META
     if (r.getRegionInfo().isRootRegion()) {
       RootLocationEditor.setRootLocation(getZooKeeper(),
@@ -1256,6 +1257,8 @@ public class HRegionServer implements HR
         MetaEditor.updateRegionLocation(ct, r.getRegionInfo(), 
getServerInfo());
       }
     }
+    // Add to online regions if all above was successful.
+    addToOnlineRegions(r);
   }
 
   /**

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
 (original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
 Sun Oct 10 06:04:59 2010
@@ -65,7 +65,8 @@ public class OpenRegionHandler extends E
 
   @Override
   public void process() throws IOException {
-    LOG.debug("Processing open of " + regionInfo.getRegionNameAsString());
+    final String name = regionInfo.getRegionNameAsString();
+    LOG.debug("Processing open of " + name);
     final String encodedName = regionInfo.getEncodedName();
 
     // TODO: Previously we would check for root region availability (but only 
that it
@@ -76,7 +77,7 @@ public class OpenRegionHandler extends E
     // Check that this region is not already online
     HRegion region = this.rsServices.getFromOnlineRegions(encodedName);
     if (region != null) {
-      LOG.warn("Attempting open of " + regionInfo.getRegionNameAsString() +
+      LOG.warn("Attempting open of " + name +
         " but it's already online on this server");
       return;
     }
@@ -89,22 +90,22 @@ public class OpenRegionHandler extends E
     try {
       // Instantiate the region.  This also periodically updates OPENING.
       region = HRegion.openHRegion(regionInfo, this.rsServices.getWAL(),
-          server.getConfiguration(), this.rsServices.getFlushRequester(),
-          new Progressable() {
-            public void progress() {
-              try {
-                int vsn = ZKAssign.retransitionNodeOpening(
-                    server.getZooKeeper(), regionInfo, server.getServerName(),
-                    openingInteger.get());
-                if (vsn == -1) {
-                  throw KeeperException.create(Code.BADVERSION);
-                }
-                openingInteger.set(vsn);
-              } catch (KeeperException e) {
-                server.abort("ZK exception refreshing OPENING node", e);
+        server.getConfiguration(), this.rsServices.getFlushRequester(),
+        new Progressable() {
+          public void progress() {
+            try {
+              int vsn = ZKAssign.retransitionNodeOpening(
+                  server.getZooKeeper(), regionInfo, server.getServerName(),
+                  openingInteger.get());
+              if (vsn == -1) {
+                throw KeeperException.create(Code.BADVERSION);
               }
+              openingInteger.set(vsn);
+            } catch (KeeperException e) {
+              server.abort("ZK exception refreshing OPENING node; " + name, e);
             }
-      });
+          }
+        });
     } catch (IOException e) {
       LOG.error("IOException instantiating region for " + regionInfo +
         "; resetting state of transition node from OPENING to OFFLINE");
@@ -115,29 +116,34 @@ public class OpenRegionHandler extends E
         ZKAssign.forceNodeOffline(server.getZooKeeper(), regionInfo,
             server.getServerName());
       } catch (KeeperException e1) {
-        LOG.error("Error forcing node back to OFFLINE from OPENING");
-        return;
+        LOG.error("Error forcing node back to OFFLINE from OPENING; " + name);
       }
       return;
     }
+    // Region is now open. Close it if error.
 
     // Re-transition node to OPENING again to verify no one has stomped on us
     openingVersion = openingInteger.get();
     try {
-      if((openingVersion = ZKAssign.retransitionNodeOpening(
+      if ((openingVersion = ZKAssign.retransitionNodeOpening(
           server.getZooKeeper(), regionInfo, server.getServerName(),
           openingVersion)) == -1) {
-        LOG.warn("Completed the OPEN of a region but when transitioning from " 
+
-            " OPENING to OPENING got a version mismatch, someone else clashed 
" +
-            "so now unassigning");
-        region.close();
+        LOG.warn("Completed the OPEN of region " + name +
+          " but when transitioning from " +
+          " OPENING to OPENING got a version mismatch, someone else clashed " +
+          "-- closing region");
+        cleanupFailedOpen(region);
         return;
       }
     } catch (KeeperException e) {
-      LOG.error("Failed transitioning node from OPENING to OPENED", e);
+      LOG.error("Failed transitioning node " + name +
+        " from OPENING to OPENED -- closing region", e);
+      cleanupFailedOpen(region);
       return;
     } catch (IOException e) {
-      LOG.error("Failed to close region after failing to transition", e);
+      LOG.error("Failed to close region " + name +
+        " after failing to transition -- closing region", e);
+      cleanupFailedOpen(region);
       return;
     }
 
@@ -146,33 +152,48 @@ public class OpenRegionHandler extends E
       this.rsServices.postOpenDeployTasks(region,
         this.server.getCatalogTracker(), false);
     } catch (IOException e) {
-      // TODO: rollback the open?
-      LOG.error("Error updating region location in catalog table", e);
+      LOG.error("Error updating " + name + " location in catalog table -- " +
+        "closing region", e);
+      cleanupFailedOpen(region);
+      return;
     } catch (KeeperException e) {
       // TODO: rollback the open?
-      LOG.error("ZK Error updating region location in catalog table", e);
+      LOG.error("ZK Error updating " + name + " location in catalog " +
+        "table -- closing region", e);
+      cleanupFailedOpen(region);
+      return;
     }
 
     // Finally, Transition ZK node to OPENED
     try {
       if (ZKAssign.transitionNodeOpened(server.getZooKeeper(), regionInfo,
           server.getServerName(), openingVersion) == -1) {
-        LOG.warn("Completed the OPEN of a region but when transitioning from " 
+
-            " OPENING to OPENED got a version mismatch, someone else clashed " 
+
-            "so now unassigning");
-        region.close();
+        LOG.warn("Completed the OPEN of region " + name +
+          " but when transitioning from " +
+          " OPENING to OPENED got a version mismatch, someone else clashed " +
+          "so now unassigning -- closing region");
+        cleanupFailedOpen(region);
         return;
       }
     } catch (KeeperException e) {
-      LOG.error("Failed transitioning node from OPENING to OPENED", e);
+      LOG.error("Failed transitioning node " + name +
+        " from OPENING to OPENED -- closing region", e);
+      cleanupFailedOpen(region);
       return;
     } catch (IOException e) {
-      LOG.error("Failed to close region after failing to transition", e);
+      LOG.error("Failed to close " + name +
+        " after failing to transition -- closing region", e);
+      cleanupFailedOpen(region);
       return;
     }
 
     // Done!  Successful region open
-    LOG.debug("Opened " + region.getRegionNameAsString());
+    LOG.debug("Opened " + name);
+  }
+
+  private void cleanupFailedOpen(final HRegion region) throws IOException {
+    if (region != null) region.close();
+    this.rsServices.removeFromOnlineRegions(regionInfo.getEncodedName());
   }
 
   int transitionZookeeperOfflineToOpening(final String encodedName) {

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java
 (original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java
 Sun Oct 10 06:04:59 2010
@@ -41,6 +41,8 @@ public abstract class ZooKeeperNodeTrack
   /** Used to abort if a fatal error occurs */
   protected final Abortable abortable;
 
+  private boolean stopped = false;
+
   /**
    * Constructs a new ZK node tracker.
    *
@@ -81,6 +83,11 @@ public abstract class ZooKeeperNodeTrack
     }
   }
 
+  public synchronized void stop() {
+    this.stopped = true;
+    notifyAll();
+  }
+
   /**
    * Gets the data of the node, blocking until the node is available.
    *
@@ -107,7 +114,7 @@ public abstract class ZooKeeperNodeTrack
     boolean notimeout = timeout == 0;
     long startTime = System.currentTimeMillis();
     long remaining = timeout;
-    while ((notimeout || remaining > 0) && this.data == null) {
+    while (!this.stopped && (notimeout || remaining > 0) && this.data == null) 
{
       if (notimeout) {
         wait();
         continue;

Modified: 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java
 (original)
+++ 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java
 Sun Oct 10 06:04:59 2010
@@ -19,6 +19,8 @@
  */
 package org.apache.hadoop.hbase.catalog;
 
+import static org.junit.Assert.assertTrue;
+
 import java.io.IOException;
 import java.net.ConnectException;
 import java.util.ArrayList;
@@ -33,11 +35,11 @@ import org.apache.hadoop.hbase.Abortable
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.HServerAddress;
 import org.apache.hadoop.hbase.HServerInfo;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
-import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.hadoop.hbase.client.Result;
@@ -102,6 +104,37 @@ public class TestCatalogTracker {
     return ct;
   }
 
+  /**
+   * Test interruptable while blocking wait on root and meta.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  @Test public void testInterruptWaitOnMetaAndRoot()
+  throws IOException, InterruptedException {
+    final CatalogTracker ct = constructAndStartCatalogTracker();
+    HServerAddress hsa = ct.getRootLocation();
+    Assert.assertNull(hsa);
+    HServerAddress meta = ct.getMetaLocation();
+    Assert.assertNull(meta);
+    Thread t = new Thread() {
+      @Override
+      public void run() {
+        try {
+          ct.waitForMeta();
+        } catch (InterruptedException e) {
+          throw new RuntimeException("Interrupted", e);
+        }
+      }
+    };
+    t.start();
+    while (!t.isAlive()) Threads.sleep(1);
+    Threads.sleep(1);
+    assertTrue(t.isAlive());
+    ct.stop();
+    // Join the thread... should exit shortly.
+    t.join();
+  }
+
   @Test public void testGetMetaServerConnectionFails()
   throws IOException, InterruptedException, KeeperException {
     HConnection connection = Mockito.mock(HConnection.class);
@@ -292,7 +325,7 @@ public class TestCatalogTracker {
       try {
         doWaiting();
       } catch (InterruptedException e) {
-        throw new RuntimeException("Failed wait on root", e);
+        throw new RuntimeException("Failed wait", e);
       }
       LOG.info("Exiting " + getName());
     }

Modified: 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
 (original)
+++ 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
 Sun Oct 10 06:04:59 2010
@@ -26,6 +26,7 @@ import java.util.NavigableMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -50,9 +51,7 @@ import org.apache.hadoop.mapreduce.lib.o
  * a particular cell, and write it back to the table.
  */
 public class TestTableMapReduce extends MultiRegionTable {
-
   private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
-
   static final String MULTI_REGION_TABLE_NAME = "mrtest";
   static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
   static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
@@ -112,17 +111,16 @@ public class TestTableMapReduce extends 
    */
   public void testMultiRegionTable()
   throws IOException, InterruptedException, ClassNotFoundException {
-    runTestOnTable(new HTable(conf, MULTI_REGION_TABLE_NAME));
+    runTestOnTable(new HTable(new Configuration(conf), 
MULTI_REGION_TABLE_NAME));
   }
 
   private void runTestOnTable(HTable table)
   throws IOException, InterruptedException, ClassNotFoundException {
     MiniMRCluster mrCluster = new MiniMRCluster(2, fs.getUri().toString(), 1);
-
     Job job = null;
     try {
       LOG.info("Before map/reduce startup");
-      job = new Job(conf, "process column contents");
+      job = new Job(table.getConfiguration(), "process column contents");
       job.setNumReduceTasks(1);
       Scan scan = new Scan();
       scan.addFamily(INPUT_FAMILY);
@@ -150,7 +148,7 @@ public class TestTableMapReduce extends 
   }
 
   private void verify(String tableName) throws IOException {
-    HTable table = new HTable(conf, tableName);
+    HTable table = new HTable(new Configuration(conf), tableName);
     boolean verified = false;
     long pause = conf.getLong("hbase.client.pause", 5 * 1000);
     int numRetries = conf.getInt("hbase.client.retries.number", 5);

Modified: 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java?rev=1006252&r1=1006251&r2=1006252&view=diff
==============================================================================
--- 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java
 (original)
+++ 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java
 Sun Oct 10 06:04:59 2010
@@ -25,6 +25,7 @@ import static org.junit.Assert.assertNot
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
+import java.io.IOException;
 import java.util.Random;
 import java.util.concurrent.Semaphore;
 
@@ -34,6 +35,7 @@ import org.apache.hadoop.hbase.Abortable
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import 
org.apache.hadoop.hbase.master.TestActiveMasterManager.NodeDeletionListener;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Threads;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
@@ -60,9 +62,36 @@ public class TestZooKeeperNodeTracker {
     TEST_UTIL.shutdownMiniZKCluster();
   }
 
+  /**
+   * Test that we can interrupt a node that is blocked on a wait.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  @Test public void testInterruptible() throws IOException, 
InterruptedException {
+    Abortable abortable = new StubAbortable();
+    ZooKeeperWatcher zk = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
+      "testInterruptible", abortable);
+    final TestTracker tracker = new TestTracker(zk, "/xyz", abortable);
+    tracker.start();
+    Thread t = new Thread() {
+      @Override
+      public void run() {
+        try {
+          tracker.blockUntilAvailable();
+        } catch (InterruptedException e) {
+          throw new RuntimeException("Interrupted", e);
+        }
+      }
+    };
+    t.start();
+    while (!t.isAlive()) Threads.sleep(1);
+    tracker.stop();
+    t.join();
+    // If it wasn't interruptible, we'd never get to here.
+  }
+
   @Test
   public void testNodeTracker() throws Exception {
-
     Abortable abortable = new StubAbortable();
     ZooKeeperWatcher zk = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
         "testNodeTracker", abortable);
@@ -209,7 +238,6 @@ public class TestZooKeeperNodeTracker {
   }
 
   public static class TestTracker extends ZooKeeperNodeTracker {
-
     public TestTracker(ZooKeeperWatcher watcher, String node,
         Abortable abortable) {
       super(watcher, node, abortable);


Reply via email to