Author: stack Date: Tue Mar 4 11:25:14 2008 New Revision: 633597 URL: http://svn.apache.org/viewvc?rev=633597&view=rev Log: HBASE-490 Doubly-assigned .META.; master uses one and clients another
Modified: hadoop/hbase/branches/0.1/CHANGES.txt hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/util/Sleeper.java hadoop/hbase/trunk/CHANGES.txt hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Sleeper.java Modified: hadoop/hbase/branches/0.1/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/CHANGES.txt?rev=633597&r1=633596&r2=633597&view=diff ============================================================================== --- hadoop/hbase/branches/0.1/CHANGES.txt (original) +++ hadoop/hbase/branches/0.1/CHANGES.txt Tue Mar 4 11:25:14 2008 @@ -28,6 +28,7 @@ HBASE-446 Fully qualified hbase.rootdir doesn't work HBASE-428 Under continuous upload of rows, WrongRegionExceptions are thrown that reach the client even after retries + HBASE-490 Doubly-assigned .META.; master uses one and clients another IMPROVEMENTS HADOOP-2555 Refactor the HTable#get and HTable#getRow methods to avoid Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java?rev=633597&r1=633596&r2=633597&view=diff ============================================================================== --- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java (original) +++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HMaster.java Tue Mar 4 11:25:14 2008 @@ -407,11 +407,11 @@ } protected void checkAssigned(final HRegionInfo info, - final String serverName, final long startCode) throws IOException { - + final String serverName, final long startCode) + throws IOException { // Skip region - if ... - if(info.isOffline() // offline - || killedRegions.contains(info.getRegionName()) // queued for offline + if (info.isOffline() // offline + || killedRegions.contains(info.getRegionName()) // queued for offline || regionsToDelete.contains(info.getRegionName())) { // queued for delete unassignedRegions.remove(info); @@ -424,9 +424,8 @@ Map<Text, HRegionInfo> regionsToKill = killList.get(serverName); if (regionsToKill != null && regionsToKill.containsKey(info.getRegionName())) { - // Skip if region is on kill list - if(LOG.isDebugEnabled()) { + if (LOG.isDebugEnabled()) { LOG.debug("not assigning region (on kill list): " + info.getRegionName()); } @@ -438,21 +437,13 @@ } /* - * If the server is not dead and either: - * the stored info is not null and the start code does not match - * or: - * the stored info is null and the region is neither unassigned nor pending - * then: + * If the server is a dead server or its startcode is off -- either null + * or doesn't match the start code for the address -- then add it to the + * list of unassigned regions IF not already there (or pending open). */ - if (!deadServer && - ((storedInfo != null && storedInfo.getStartCode() != startCode) || - (storedInfo == null && - !unassignedRegions.containsKey(info) && - !pendingRegions.contains(info.getRegionName()) - ) - ) - ) { - + if (!deadServer && !unassignedRegions.containsKey(info) && + !pendingRegions.contains(info.getRegionName()) + && (storedInfo == null || storedInfo.getStartCode() != startCode)) { // The current assignment is invalid if (LOG.isDebugEnabled()) { LOG.debug("Current assignment of " + info.getRegionName() + @@ -503,13 +494,13 @@ super(true, metaRescanInterval, closed); } + // Don't retry if we get an error while scanning. Errors are most often + // caused by the server going away. Wait until next rescan interval when + // things should be back to normal private boolean scanRoot() { - // Don't retry if we get an error while scanning. Errors are most often - // caused by the server going away. Wait until next rescan interval when - // things should be back to normal boolean scanSuccessful = false; synchronized (rootRegionLocation) { - while(!closed.get() && rootRegionLocation.get() == null) { + while (!closed.get() && rootRegionLocation.get() == null) { // rootRegionLocation will be filled in when we get an 'open region' // regionServerReport message from the HRegionServer that has been // allocated the ROOT region below. @@ -639,7 +630,7 @@ /** Set by root scanner to indicate the number of meta regions */ volatile AtomicInteger numberOfMetaRegions = new AtomicInteger(); - /** Work for the meta scanner is queued up here */ + /** Initial work for the meta scanner is queued up here */ volatile BlockingQueue<MetaRegion> metaRegionsToScan = new LinkedBlockingQueue<MetaRegion>(); @@ -668,10 +659,10 @@ super(false, metaRescanInterval, closed); } + // Don't retry if we get an error while scanning. Errors are most often + // caused by the server going away. Wait until next rescan interval when + // things should be back to normal private boolean scanOneMetaRegion(MetaRegion region) { - // Don't retry if we get an error while scanning. Errors are most often - // caused by the server going away. Wait until next rescan interval when - // things should be back to normal boolean scanSuccessful = false; while (!closed.get() && !rootScanned && rootRegionLocation.get() == null) { @@ -713,7 +704,12 @@ @Override protected boolean initialScan() { MetaRegion region = null; - while (!closed.get() && region == null && !metaRegionsScanned()) { + // Keep going if not closed, metaRegionsToScan has been emptied (or it + // hasn't gotten anything in it yet) and all meta regions are onlined + // (root and meta). + while (!closed.get() && + (region == null || metaRegionsToScan.size() > 0) && + !metaRegionsScanned()) { try { region = metaRegionsToScan.poll(threadWakeFrequency, TimeUnit.MILLISECONDS); Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/util/Sleeper.java URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/util/Sleeper.java?rev=633597&r1=633596&r2=633597&view=diff ============================================================================== --- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/util/Sleeper.java (original) +++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/util/Sleeper.java Tue Mar 4 11:25:14 2008 @@ -44,7 +44,7 @@ * Sleep for period. */ public void sleep() { - sleep(period); + sleep(System.currentTimeMillis()); } /** Modified: hadoop/hbase/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=633597&r1=633596&r2=633597&view=diff ============================================================================== --- hadoop/hbase/trunk/CHANGES.txt (original) +++ hadoop/hbase/trunk/CHANGES.txt Tue Mar 4 11:25:14 2008 @@ -28,6 +28,7 @@ HBASE-462 Update migration tool HBASE-473 When a table is deleted, master sends multiple close messages to the region server + HBASE-490 Doubly-assigned .META.; master uses one and clients another IMPROVEMENTS HBASE-415 Rewrite leases to use DelayedBlockingQueue instead of polling Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java?rev=633597&r1=633596&r2=633597&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java Tue Mar 4 11:25:14 2008 @@ -372,21 +372,13 @@ } /* - * If the server is not dead and either: - * the stored info is not null and the start code does not match - * or: - * the stored info is null and the region is neither unassigned nor pending - * then: + * If the server is a dead server or its startcode is off -- either null + * or doesn't match the start code for the address -- then add it to the + * list of unassigned regions IF not already there (or pending open). */ - if (!deadServer && - ((storedInfo != null && storedInfo.getStartCode() != startCode) || - (storedInfo == null && - !regionManager.isUnassigned(info) && + if (!deadServer && !regionManager.isUnassigned(info) && !regionManager.isPending(info.getRegionName()) - ) - ) - ) { - + && (storedInfo == null || storedInfo.getStartCode() != startCode)) { // The current assignment is invalid if (LOG.isDebugEnabled()) { LOG.debug("Current assignment of " + info.getRegionName() + Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java?rev=633597&r1=633596&r2=633597&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java Tue Mar 4 11:25:14 2008 @@ -38,7 +38,7 @@ * action would prevent other work from getting done. */ class MetaScanner extends BaseScanner { - /** Work for the meta scanner is queued up here */ + /** Initial work for the meta scanner is queued up here */ private volatile BlockingQueue<MetaRegion> metaRegionsToScan = new LinkedBlockingQueue<MetaRegion>(); @@ -50,10 +50,10 @@ super(master, regionManager, false, master.metaRescanInterval, master.closed); } + // Don't retry if we get an error while scanning. Errors are most often + // caused by the server going away. Wait until next rescan interval when + // things should be back to normal private boolean scanOneMetaRegion(MetaRegion region) { - // Don't retry if we get an error while scanning. Errors are most often - // caused by the server going away. Wait until next rescan interval when - // things should be back to normal boolean scanSuccessful = false; while (!master.closed.get() && !regionManager.isInitialRootScanComplete() && regionManager.getRootRegionLocation() == null) { @@ -95,7 +95,9 @@ @Override protected boolean initialScan() { MetaRegion region = null; - while (!master.closed.get() && region == null && !metaRegionsScanned()) { + while (!master.closed.get() && + (region == null && metaRegionsToScan.size() > 0) && + !metaRegionsScanned()) { try { region = metaRegionsToScan.poll(master.threadWakeFrequency, TimeUnit.MILLISECONDS); @@ -164,4 +166,4 @@ void addMetaRegionToScan(MetaRegion m) throws InterruptedException { metaRegionsToScan.add(m); } -} \ No newline at end of file +} Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=633597&r1=633596&r2=633597&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Mar 4 11:25:14 2008 @@ -598,7 +598,7 @@ public void waitForRootRegionLocation() { synchronized (rootRegionLocation) { - while(!master.closed.get() && rootRegionLocation.get() == null) { + while (!master.closed.get() && rootRegionLocation.get() == null) { // rootRegionLocation will be filled in when we get an 'open region' // regionServerReport message from the HRegionServer that has been // allocated the ROOT region below. Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java?rev=633597&r1=633596&r2=633597&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java Tue Mar 4 11:25:14 2008 @@ -31,10 +31,10 @@ super(master, regionManager, true, master.metaRescanInterval, master.closed); } + // Don't retry if we get an error while scanning. Errors are most often + // caused by the server going away. Wait until next rescan interval when + // things should be back to normal private boolean scanRoot() { - // Don't retry if we get an error while scanning. Errors are most often - // caused by the server going away. Wait until next rescan interval when - // things should be back to normal boolean scanSuccessful = false; master.waitForRootRegionLocation(); if (master.closed.get()) { @@ -71,4 +71,4 @@ protected void maintenanceScan() { scanRoot(); } -} \ No newline at end of file +} Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Sleeper.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Sleeper.java?rev=633597&r1=633596&r2=633597&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Sleeper.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Sleeper.java Tue Mar 4 11:25:14 2008 @@ -44,7 +44,7 @@ * Sleep for period. */ public void sleep() { - sleep(period); + sleep(System.currentTimeMillis()); } /** @@ -69,4 +69,4 @@ } } } -} \ No newline at end of file +}