Author: stack Date: Sat Aug 18 11:04:53 2007 New Revision: 567308 URL: http://svn.apache.org/viewvc?view=rev&rev=567308 Log: HADOOP-1730 unexpected null value causes META scanner to exit (silently)
Added handling for legal null value scanning META table and added logging of unexpected exceptions that arise scanning. M src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestSplit.java Refactored to do a staged removal of daughter references. (compact, recalibrate): Added. (getSplitParent): Refactored as getSplitParentInfo. M src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConnectionManager.java Added formatting of the find table result string so shorter (when 30-odd regions fills page with its output). M src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTable.java Formatting to clean eclipse warnings. M src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java The split column in a parent meta table entry can be null (Happens if a daughter split no longer has references -- it removes its entry from parent). Add handling and clean up around split management code. Added logging of unexpected exceptions scanning a region. M src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java Added fix for NPE when client asks for scanner but passes non-existent columns. M src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/Writables.java (getHRegionInfo, getHRegionInfoOrNull): Added.: Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConnectionManager.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTable.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/Writables.java lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestSplit.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?view=diff&rev=567308&r1=567307&r2=567308 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original) +++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Sat Aug 18 11:04:53 2007 @@ -11,6 +11,7 @@ BUG FIXES HADOOP-1729 Recent renaming or META tables breaks hbase shell + HADOOP-1730 unexpected null value causes META scanner to exit (silently) IMPROVEMENTS Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConnectionManager.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConnectionManager.java?view=diff&rev=567308&r1=567307&r2=567308 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConnectionManager.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConnectionManager.java Sat Aug 18 11:04:53 2007 @@ -46,7 +46,12 @@ * multiple HBase instances */ public class HConnectionManager implements HConstants { - private HConnectionManager() {} // Not instantiable + /* + * Private. Not instantiable. + */ + private HConnectionManager() { + super(); + } // A Map of master HServerAddress -> connection information for that instance // Note that although the Map is synchronized, the objects it contains @@ -298,7 +303,6 @@ } SortedMap<Text, HRegionLocation> servers = new TreeMap<Text, HRegionLocation>(); - servers.putAll(tableServers); return servers; } @@ -306,20 +310,30 @@ /** [EMAIL PROTECTED] */ public SortedMap<Text, HRegionLocation> reloadTableServers(final Text tableName) throws IOException { - closedTables.remove(tableName); - - SortedMap<Text, HRegionLocation> servers = + SortedMap<Text, HRegionLocation> tableServers = new TreeMap<Text, HRegionLocation>(); - // Reload information for the whole table - - servers.putAll(findServersForTable(tableName)); + tableServers.putAll(findServersForTable(tableName)); if (LOG.isDebugEnabled()) { - LOG.debug("Result of findTable: " + servers.toString()); + StringBuilder sb = new StringBuilder(); + int count = 0; + for (HRegionLocation location: tableServers.values()) { + if (sb.length() > 0) { + sb.append(" "); + } + sb.append(count++); + sb.append(". "); + sb.append("address="); + sb.append(location.getServerAddress()); + sb.append(", "); + sb.append(location.getRegionInfo().getRegionName()); + } + LOG.debug("Result of findTable on " + tableName.toString() + + ": " + sb.toString()); } - return servers; + return tableServers; } /** [EMAIL PROTECTED] */ @@ -413,7 +427,7 @@ } } - SortedMap<Text, HRegionLocation> servers = + SortedMap<Text, HRegionLocation> srvrs = new TreeMap<Text, HRegionLocation>(); if (tableName.equals(ROOT_TABLE_NAME)) { @@ -428,7 +442,7 @@ if (tableServers == null) { tableServers = locateRootRegion(); } - servers.putAll(tableServers); + srvrs.putAll(tableServers); } } else if (tableName.equals(META_TABLE_NAME)) { @@ -459,7 +473,7 @@ } } } - servers.putAll(tableServers); + srvrs.putAll(tableServers); } } else { boolean waited = false; @@ -486,7 +500,7 @@ if (tableServers == null) { throw new TableNotFoundException("table not found: " + tableName); } - servers.putAll(tableServers); + srvrs.putAll(tableServers); } } if (!waited) { @@ -504,7 +518,7 @@ for (HRegionLocation t: metaServers.values()) { try { - servers.putAll(scanOneMetaRegion(t, tableName)); + srvrs.putAll(scanOneMetaRegion(t, tableName)); } catch (IOException e) { if (tries < numRetries - 1) { @@ -528,15 +542,8 @@ } } } - this.tablesToServers.put(tableName, servers); - if (LOG.isDebugEnabled()) { - int count = 0; - for (Map.Entry<Text, HRegionLocation> e: servers.entrySet()) { - LOG.debug("Region " + (1 + count++) + " of " + servers.size() + - ": " + e.getValue()); - } - } - return servers; + this.tablesToServers.put(tableName, srvrs); + return srvrs; } /* @@ -598,7 +605,6 @@ try { rootRegion.getRegionInfo(HGlobals.rootRegionInfo.regionName); break; - } catch (IOException e) { if (tries == numRetries - 1) { // Don't bother sleeping. We've run out of retries. Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java?view=diff&rev=567308&r1=567307&r2=567308 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Sat Aug 18 11:04:53 2007 @@ -262,31 +262,26 @@ } } - // Scan is finished. Take a look at split parents to see if any we can clean up. - + // Scan is finished. Take a look at split parents to see if any we can + // clean up. if (splitParents.size() > 0) { for (Map.Entry<HRegionInfo, SortedMap<Text, byte[]>> e: - splitParents.entrySet()) { - - SortedMap<Text, byte[]> results = e.getValue(); - cleanupSplits(region.regionName, regionServer, e.getKey(), - (HRegionInfo) Writables.getWritable(results.get(COL_SPLITA), - new HRegionInfo()), - (HRegionInfo) Writables.getWritable(results.get(COL_SPLITB), - new HRegionInfo())); + splitParents.entrySet()) { + HRegionInfo hri = e.getKey(); + cleanupSplits(region.regionName, regionServer, hri, e.getValue()); } } LOG.info(Thread.currentThread().getName() + " scan of meta region " + region.regionName + " complete"); } + /* + * @param info Region to check. + * @return True if this is a split parent. + */ private boolean isSplitParent(final HRegionInfo info) { - boolean result = false; - - // Skip if not a split region. - if (!info.isSplit()) { - return result; + return false; } if (!info.isOffline()) { LOG.warn("Region is split but not offline: " + info.regionName); @@ -294,77 +289,87 @@ return true; } - /** - * @param metaRegionName + /* + * If daughters no longer hold reference to the parents, delete the parent. + * @param metaRegionName Meta region name. * @param server HRegionInterface of meta server to talk to - * @param info HRegionInfo of split parent - * @param splitA low key range child region - * @param splitB upper key range child region - * @return True if we removed <code>info</code> and this region has - * been cleaned up. + * @param parent HRegionInfo of split parent + * @param rowContent Content of <code>parent</code> row in + * <code>metaRegionName</code> + * @return True if we removed <code>parent</code> from meta table and from + * the filesystem. * @throws IOException */ private boolean cleanupSplits(final Text metaRegionName, - final HRegionInterface server, final HRegionInfo info, - final HRegionInfo splitA, final HRegionInfo splitB) throws IOException { - + final HRegionInterface srvr, final HRegionInfo parent, + SortedMap<Text, byte[]> rowContent) + throws IOException { boolean result = false; if (LOG.isDebugEnabled()) { - LOG.debug("Checking " + info.getRegionName() + " to see if daughter " + - "splits still hold references"); + LOG.debug("Checking " + parent.getRegionName() + + " to see if daughter splits still hold references"); } - boolean noReferencesA = splitA == null; - boolean noReferencesB = splitB == null; + + boolean hasReferencesA = hasReferences(metaRegionName, srvr, + parent.getRegionName(), rowContent, COL_SPLITA); + boolean hasReferencesB = hasReferences(metaRegionName, srvr, + parent.getRegionName(), rowContent, COL_SPLITB); - if (!noReferencesA) { - noReferencesA = hasReferences(metaRegionName, server, - info.getRegionName(), splitA, COL_SPLITA); - } - if (!noReferencesB) { - noReferencesB = hasReferences(metaRegionName, server, - info.getRegionName(), splitB, COL_SPLITB); - } - if (!noReferencesA && !noReferencesB) { - // No references. Remove this item from table and deleted region on - // disk. - LOG.info("Deleting region " + info.getRegionName() + + if (!hasReferencesA && !hasReferencesB) { + LOG.info("Deleting region " + parent.getRegionName() + " because daughter splits no longer hold references"); - - if (!HRegion.deleteRegion(fs, dir, info.getRegionName())) { - LOG.warn("Deletion of " + info.getRegionName() + " failed"); + + if (!HRegion.deleteRegion(fs, dir, parent.getRegionName())) { + LOG.warn("Deletion of " + parent.getRegionName() + " failed"); } BatchUpdate b = new BatchUpdate(); - long lockid = b.startUpdate(info.getRegionName()); + long lockid = b.startUpdate(parent.getRegionName()); b.delete(lockid, COL_REGIONINFO); b.delete(lockid, COL_SERVER); b.delete(lockid, COL_STARTCODE); - server.batchUpdate(metaRegionName, System.currentTimeMillis(), b); + srvr.batchUpdate(metaRegionName, System.currentTimeMillis(), b); result = true; } if (LOG.isDebugEnabled()) { - LOG.debug("Done checking " + info.getRegionName() + ": splitA: " + - noReferencesA + ", splitB: "+ noReferencesB); + LOG.debug("Done checking " + parent.getRegionName() + ": splitA: " + + hasReferencesA + ", splitB: "+ hasReferencesB); } return result; } - + + /* + * Checks if a daughter region -- either splitA or splitB -- still holds + * references to parent. If not, removes reference to the split from + * the parent meta region row. + * @param metaRegionName Name of meta region to look in. + * @param srvr Where region resides. + * @param parent Parent region name. + * @param rowContent Keyed content of the parent row in meta region. + * @param splitColumn Column name of daughter split to examine + * @return True if still has references to parent. + * @throws IOException + */ protected boolean hasReferences(final Text metaRegionName, - final HRegionInterface server, final Text regionName, - final HRegionInfo split, final Text column) throws IOException { - + final HRegionInterface srvr, final Text parent, + SortedMap<Text, byte[]> rowContent, final Text splitColumn) + throws IOException { boolean result = false; + HRegionInfo split = + Writables.getHRegionInfoOrNull(rowContent.get(splitColumn)); + if (split == null) { + return result; + } for (Text family: split.getTableDesc().families().keySet()) { Path p = HStoreFile.getMapDir(fs.makeQualified(dir), split.getRegionName(), HStoreKey.extractFamily(family)); - - // Look for reference files. - + // Look for reference files. Call listPaths with an anonymous + // instance of PathFilter. Path [] ps = fs.listPaths(p, new PathFilter () { - public boolean accept(Path p) { - return HStoreFile.isReference(p); + public boolean accept(Path path) { + return HStoreFile.isReference(path); } } ); @@ -381,13 +386,13 @@ if (LOG.isDebugEnabled()) { LOG.debug(split.getRegionName().toString() - +" no longer has references to " + regionName.toString()); + +" no longer has references to " + parent.toString()); } BatchUpdate b = new BatchUpdate(); - long lockid = b.startUpdate(regionName); - b.delete(lockid, column); - server.batchUpdate(metaRegionName, System.currentTimeMillis(), b); + long lockid = b.startUpdate(parent); + b.delete(lockid, splitColumn); + srvr.batchUpdate(metaRegionName, System.currentTimeMillis(), b); return result; } @@ -468,7 +473,6 @@ HGlobals.rootRegionInfo.regionName, null)); } break; - } catch (IOException e) { if (e instanceof RemoteException) { try { @@ -485,6 +489,10 @@ } else { LOG.error("Scan ROOT region", e); } + } catch (Exception e) { + // If for some reason we get some other kind of exception, + // at least log it rather than go out silently. + LOG.error("Unexpected exception", e); } if (!closed) { // sleep before retry @@ -597,19 +605,16 @@ try { // Don't interrupt us while we're working - synchronized (metaScannerLock) { scanRegion(region); onlineMetaRegions.put(region.startKey, region); } break; - } catch (IOException e) { if (e instanceof RemoteException) { try { e = RemoteExceptionHandler.decodeRemoteException( (RemoteException) e); - } catch (IOException ex) { e = ex; } @@ -620,10 +625,14 @@ } else { LOG.error("Scan one META region", e); } + } catch (Exception e) { + // If for some reason we get some other kind of exception, + // at least log it rather than go out silently. + LOG.error("Unexpected exception", e); } + if (!closed) { // sleep before retry - try { Thread.sleep(threadWakeFrequency); } catch (InterruptedException e) { Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java?view=diff&rev=567308&r1=567307&r2=567308 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java Sat Aug 18 11:04:53 2007 @@ -20,7 +20,9 @@ package org.apache.hadoop.hbase; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Random; import java.util.TreeMap; @@ -971,7 +973,8 @@ * @throws IOException */ public HInternalScannerInterface getScanner(Text[] cols, Text firstRow, - long timestamp, RowFilterInterface filter) throws IOException { + long timestamp, RowFilterInterface filter) + throws IOException { lock.obtainReadLock(); try { TreeSet<Text> families = new TreeSet<Text>(); @@ -979,12 +982,16 @@ families.add(HStoreKey.extractFamily(cols[i])); } - HStore[] storelist = new HStore[families.size()]; - int i = 0; + List<HStore> storelist = new ArrayList<HStore>(); for (Text family: families) { - storelist[i++] = stores.get(family); + HStore s = stores.get(family); + if (s == null) { + continue; + } + storelist.add(stores.get(family)); } - return new HScanner(cols, firstRow, timestamp, memcache, storelist, filter); + return new HScanner(cols, firstRow, timestamp, memcache, + storelist.toArray(new HStore [] {}), filter); } finally { lock.releaseReadLock(); } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTable.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTable.java?view=diff&rev=567308&r1=567307&r2=567308 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTable.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTable.java Sat Aug 18 11:04:53 2007 @@ -586,11 +586,10 @@ HRegionLocation r = getRegionLocation(batch.getRow()); HRegionInterface server = connection.getHRegionConnection(r.getServerAddress()); - try { - server.batchUpdate(r.getRegionInfo().getRegionName(), timestamp, batch); + server.batchUpdate(r.getRegionInfo().getRegionName(), timestamp, + batch); break; - } catch (IOException e) { if (e instanceof RemoteException) { e = RemoteExceptionHandler.decodeRemoteException( @@ -601,7 +600,6 @@ LOG.debug("reloading table servers because: " + e.getMessage()); } tableServers = connection.reloadTableServers(tableName); - } else { throw e; } @@ -628,6 +626,7 @@ */ @Deprecated public synchronized void renewLease(@SuppressWarnings("unused") long lockid) { + // noop } /** Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/Writables.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/Writables.java?view=diff&rev=567308&r1=567307&r2=567308 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/Writables.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/Writables.java Sat Aug 18 11:04:53 2007 @@ -29,6 +29,7 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; /** * Utility class with methods for manipulating Writable objects @@ -88,6 +89,28 @@ } finally { in.close(); } + } + + /** + * @param bytes + * @return A HRegionInfo instance built out of passed <code>bytes</code>. + * @throws IOException + */ + public static HRegionInfo getHRegionInfo(final byte [] bytes) + throws IOException { + return (HRegionInfo)getWritable(bytes, new HRegionInfo()); + } + + /** + * @param bytes + * @return A HRegionInfo instance built out of passed <code>bytes</code> + * or <code>null</code> if passed bytes are null or an empty array. + * @throws IOException + */ + public static HRegionInfo getHRegionInfoOrNull(final byte [] bytes) + throws IOException { + return (bytes == null || bytes.length <= 0)? + (HRegionInfo)null: getHRegionInfo(bytes); } /** Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestSplit.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestSplit.java?view=diff&rev=567308&r1=567307&r2=567308 ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestSplit.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestSplit.java Sat Aug 18 11:04:53 2007 @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ConcurrentModificationException; +import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; @@ -168,18 +169,17 @@ int count = count(meta, HConstants.COLUMN_FAMILY_STR); t = new HTable(this.conf, new Text(getName())); addContent(new HTableLoader(t), COLFAMILY_NAME3); - // All is running in the one JVM so I should be able to get the + // All is running in the one JVM so I should be able to get the single // region instance and bring on a split. HRegionInfo hri = t.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo(); - HRegion r = - cluster.regionThreads.get(0).getRegionServer().onlineRegions.get( - hri.getRegionName()); + HRegion r = cluster.regionThreads.get(0).getRegionServer(). + onlineRegions.get(hri.getRegionName()); // Flush will provoke a split next time the split-checker thread runs. r.flushcache(false); // Now, wait until split makes it into the meta table. for (int i = 0; i < retries && - (count(meta, HConstants.COLUMN_FAMILY_STR) <= count); i++) { + (count(meta, HConstants.COLUMN_FAMILY_STR) <= count); i++) { Thread.sleep(5000); } int oldCount = count; @@ -187,67 +187,45 @@ if (count <= oldCount) { throw new IOException("Failed waiting on splits to show up"); } - HRegionInfo parent = getSplitParent(meta); + // Get info on the parent from the meta table. Pass in 'hri'. Its the + // region we have been dealing with up to this. Its the parent of the + // region split. + Map<Text, byte []> data = getSplitParentInfo(meta, hri); + HRegionInfo parent = + Writables.getHRegionInfoOrNull(data.get(HConstants.COL_REGIONINFO)); assertTrue(parent.isOffline()); + assertTrue(parent.isSplit()); + HRegionInfo splitA = + Writables.getHRegionInfoOrNull(data.get(HConstants.COL_SPLITA)); + HRegionInfo splitB = + Writables.getHRegionInfoOrNull(data.get(HConstants.COL_SPLITB)); Path parentDir = HRegion.getRegionDir(d, parent.getRegionName()); assertTrue(fs.exists(parentDir)); - LOG.info("Split happened and parent " + parent.getRegionName() + " is " + - "offline"); - for (int i = 0; i < retries; i++) { - // Now open a scanner on the table. This will force HTable to recalibrate - // and in doing so, will force us to wait until the new child regions - // come on-line (since they are no longer automatically served by the - // HRegionServer that was serving the parent. In this test they will - // end up on the same server (since there is only one), but we have to - // wait until the master assigns them. - try { - HScannerInterface s = - t.obtainScanner(new Text[] {new Text(COLFAMILY_NAME3)}, - HConstants.EMPTY_START_ROW); - try { - HStoreKey key = new HStoreKey(); - TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>(); - s.next(key, results); - break; - - } finally { - s.close(); - } - } catch (NotServingRegionException x) { - Thread.sleep(5000); - } - } - // Now, force a compaction. This will rewrite references and make it - // so the parent region becomes deletable. - LOG.info("Starting compaction"); - for (MiniHBaseCluster.RegionServerThread thread: cluster.regionThreads) { - SortedMap<Text, HRegion> regions = - thread.getRegionServer().onlineRegions; - // Retry if ConcurrentModification... alternative of sync'ing is not - // worth it for sake of unit test. - for (int i = 0; i < 10; i++) { - try { - for (HRegion online: regions.values()) { - if (online.getTableDesc().getName().toString().equals(getName())) { - online.compactStores(); - } - } - break; - } catch (ConcurrentModificationException e) { - LOG.warn("Retrying because ..." + e.toString() + " -- one or " + - "two should be fine"); - continue; - } - } + LOG.info("Split happened. Parent is " + parent.getRegionName() + + " and daughters are " + splitA.getRegionName() + ", " + + splitB.getRegionName()); + // Recalibrate will cause us to wait on new regions' deployment + recalibrate(t, new Text(COLFAMILY_NAME3), retries); + // Compact a region at a time so we can test case where one region has + // no references but the other still has some + compact(cluster, splitA); + // Wait till the parent only has reference to remaining split, one that + // still has references. + while (getSplitParentInfo(meta, parent).size() == 3) { + Thread.sleep(5000); } - + LOG.info("Parent split returned " + + getSplitParentInfo(meta, parent).keySet().toString()); + // Call second split. + compact(cluster, splitB); // Now wait until parent disappears. LOG.info("Waiting on parent " + parent.getRegionName() + " to disappear"); - for (int i = 0; i < retries && getSplitParent(meta) != null; i++) { + for (int i = 0; i < retries && + getSplitParentInfo(meta, parent) != null; i++) { Thread.sleep(5000); } - assertTrue(getSplitParent(meta) == null); + assertTrue(getSplitParentInfo(meta, parent) == null); // Assert cleaned up. for (int i = 0; i < retries && fs.exists(parentDir); i++) { Thread.sleep(5000); @@ -258,6 +236,70 @@ } } + /* + * Compact the passed in region <code>r</code>. + * @param cluster + * @param r + * @throws IOException + */ + private void compact(final MiniHBaseCluster cluster, final HRegionInfo r) + throws IOException { + LOG.info("Starting compaction"); + for (MiniHBaseCluster.RegionServerThread thread: cluster.regionThreads) { + SortedMap<Text, HRegion> regions = + thread.getRegionServer().onlineRegions; + // Retry if ConcurrentModification... alternative of sync'ing is not + // worth it for sake of unit test. + for (int i = 0; i < 10; i++) { + try { + for (HRegion online: regions.values()) { + if (online.getRegionName().toString(). + equals(r.getRegionName().toString())) { + online.compactStores(); + } + } + break; + } catch (ConcurrentModificationException e) { + LOG.warn("Retrying because ..." + e.toString() + " -- one or " + + "two should be fine"); + continue; + } + } + } + } + + /* + * Recalibrate passed in HTable. Run after change in region geography. + * Open a scanner on the table. This will force HTable to recalibrate + * and in doing so, will force us to wait until the new child regions + * come on-line (since they are no longer automatically served by the + * HRegionServer that was serving the parent. In this test they will + * end up on the same server (since there is only one), but we have to + * wait until the master assigns them. + * @param t + * @param retries + */ + private void recalibrate(final HTable t, final Text column, + final int retries) + throws IOException, InterruptedException { + for (int i = 0; i < retries; i++) { + try { + HScannerInterface s = + t.obtainScanner(new Text[] {column}, HConstants.EMPTY_START_ROW); + try { + HStoreKey key = new HStoreKey(); + TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>(); + s.next(key, results); + break; + } finally { + s.close(); + } + } catch (NotServingRegionException x) { + Thread.sleep(5000); + } + } + } + private void assertGet(final HRegion r, final String family, final Text k) throws IOException { // Now I have k, get values out and assert they are as expected. @@ -270,30 +312,29 @@ } } - private HRegionInfo getSplitParent(final HTable t) + /* + * @return Return row info for passed in region or null if not found in scan. + */ + private Map<Text, byte []> getSplitParentInfo(final HTable t, + final HRegionInfo parent) throws IOException { - HRegionInfo result = null; - HScannerInterface s = t.obtainScanner(HConstants.COL_REGIONINFO_ARRAY, + HScannerInterface s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY, HConstants.EMPTY_START_ROW, System.currentTimeMillis(), null); try { HStoreKey curKey = new HStoreKey(); TreeMap<Text, byte []> curVals = new TreeMap<Text, byte []>(); while(s.next(curKey, curVals)) { - byte[] bytes = curVals.get(HConstants.COL_REGIONINFO); - if (bytes == null || bytes.length == 0) { + HRegionInfo hri = Writables. + getHRegionInfoOrNull(curVals.get(HConstants.COL_REGIONINFO)); + if (hri == null) { continue; } - HRegionInfo hri = - (HRegionInfo) Writables.getWritable(bytes, new HRegionInfo()); - - // Assert that if region is a split region, that it is also offline. - // Otherwise, if not a split region, assert that it is online. - if (hri.isSplit() && hri.isOffline()) { - result = hri; - break; + if (hri.getRegionName().toString(). + equals(parent.getRegionName().toString())) { + return curVals; } } - return result; + return null; } finally { s.close(); }