Author: jimk Date: Wed Aug 22 09:59:43 2007 New Revision: 568700 URL: http://svn.apache.org/viewvc?rev=568700&view=rev Log: HADOOP-1527 Region server won't start because logdir exists
Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerAbort.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=568700&r1=568699&r2=568700&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original) +++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Wed Aug 22 09:59:43 2007 @@ -10,6 +10,7 @@ OPTIMIZATIONS BUG FIXES + HADOOP-1527 Region server won't start because logdir exists HADOOP-1723 If master asks region server to shut down, by-pass return of shutdown message HADOOP-1729 Recent renaming or META tables breaks hbase shell Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java?rev=568700&r1=568699&r2=568700&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java Wed Aug 22 09:59:43 2007 @@ -83,7 +83,7 @@ long filenum = 0; AtomicInteger numEntries = new AtomicInteger(0); - Integer rollLock = new Integer(0); + Integer rollLock = Integer.valueOf(0); /** * Split up a bunch of log files, that are no longer being written to, @@ -439,35 +439,61 @@ notifyAll(); } + private static void usage() { + System.err.println("Usage: java org.apache.hbase.HLog" + + " {--dump <logfile>... | --split <logdir>...}"); + } + /** - * Pass a log file and it will dump out a text version on - * <code>stdout</code>. + * Pass one or more log file names and it will either dump out a text version + * on <code>stdout</code> or split the specified log files. * @param args * @throws IOException */ public static void main(String[] args) throws IOException { - if (args.length < 1) { - System.err.println("Usage: java org.apache.hbase.HLog <logfile>"); + if (args.length < 2) { + usage(); System.exit(-1); } + boolean dump = true; + if (args[0].compareTo("--dump") != 0) { + if (args[0].compareTo("--split") == 0) { + dump = false; + + } else { + usage(); + System.exit(-1); + } + } Configuration conf = new HBaseConfiguration(); FileSystem fs = FileSystem.get(conf); - Path logfile = new Path(args[0]); - if (!fs.exists(logfile)) { - throw new FileNotFoundException(args[0] + " does not exist"); - } - if (!fs.isFile(logfile)) { - throw new IOException(args[0] + " is not a file"); - } - Reader log = new SequenceFile.Reader(fs, logfile, conf); - try { - HLogKey key = new HLogKey(); - HLogEdit val = new HLogEdit(); - while(log.next(key, val)) { - System.out.println(key.toString() + " " + val.toString()); + Path baseDir = new Path(conf.get(HBASE_DIR, DEFAULT_HBASE_DIR)); + + for (int i = 1; i < args.length; i++) { + Path logPath = new Path(args[i]); + if (!fs.exists(logPath)) { + throw new FileNotFoundException(args[i] + " does not exist"); + } + if (dump) { + if (!fs.isFile(logPath)) { + throw new IOException(args[i] + " is not a file"); + } + Reader log = new SequenceFile.Reader(fs, logPath, conf); + try { + HLogKey key = new HLogKey(); + HLogEdit val = new HLogEdit(); + while(log.next(key, val)) { + System.out.println(key.toString() + " " + val.toString()); + } + } finally { + log.close(); + } + } else { + if (!fs.getFileStatus(logPath).isDir()) { + throw new IOException(args[i] + " is not a directory"); + } + splitLog(baseDir, logPath, fs, conf); } - } finally { - log.close(); } } } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java?rev=568700&r1=568699&r2=568700&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Wed Aug 22 09:59:43 2007 @@ -38,6 +38,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -101,6 +103,8 @@ long metaRescanInterval; final AtomicReference<HServerAddress> rootRegionLocation; + + Lock splitLogLock = new ReentrantLock(); /** * Base HRegion scanner class. Holds utilty common to <code>ROOT</code> and @@ -424,7 +428,32 @@ pendingRegions.contains(info.regionName)) && (storedInfo == null || storedInfo.getStartCode() != startCode)) { - // The current assignment is no good; load the region. + // The current assignment is no good + + // Recover the region server's log if there is one. + + if (serverName.length() != 0) { + StringBuilder dirName = new StringBuilder("log_"); + dirName.append(serverName.replace(":", "_")); + Path logDir = new Path(dir, dirName.toString()); + try { + if (fs.exists(logDir)) { + splitLogLock.lock(); + try { + HLog.splitLog(dir, logDir, fs, conf); + + } finally { + splitLogLock.unlock(); + } + } + + } catch (IOException e) { + LOG.warn("unable to split region server log because: ", e); + } + } + + // Now get the region assigned + unassignedRegions.put(info.regionName, info); assignAttempts.put(info.regionName, Long.valueOf(0L)); } @@ -513,7 +542,7 @@ private RootScanner rootScanner; private Thread rootScannerThread; - Integer rootScannerLock = new Integer(0); + Integer rootScannerLock = Integer.valueOf(0); @SuppressWarnings("unchecked") static class MetaRegion implements Comparable { @@ -702,7 +731,7 @@ MetaScanner metaScanner; private Thread metaScannerThread; - Integer metaScannerLock = new Integer(0); + Integer metaScannerLock = Integer.valueOf(0); /** * The 'unassignedRegions' table maps from a region name to a HRegionInfo @@ -1832,9 +1861,23 @@ if (!logSplit) { // Process the old log file - HLog.splitLog(dir, new Path(dir, "log" + "_" + - deadServer.getBindAddress() + "_" + deadServer.getPort()), fs, conf); + StringBuilder dirName = new StringBuilder("log_"); + dirName.append(deadServer.getBindAddress()); + dirName.append("_"); + dirName.append(deadServer.getPort()); + Path logdir = new Path(dir, dirName.toString()); + if (fs.exists(logdir)) { + if (!splitLogLock.tryLock()) { + return false; + } + try { + HLog.splitLog(dir, logdir, fs, conf); + + } finally { + splitLogLock.unlock(); + } + } logSplit = true; } @@ -2154,8 +2197,8 @@ // We can't proceed until the root region is online and has been scanned if (LOG.isDebugEnabled()) { LOG.debug("root region: " + - ((rootRegionLocation != null)? - rootRegionLocation.toString(): "null") + + ((rootRegionLocation.get() != null)? + rootRegionLocation.get().toString(): "null") + ", rootScanned: " + rootScanned); } return false; @@ -2946,7 +2989,9 @@ // the PendingServerShutdown operation has a chance to split the log file. try { - msgQueue.put(new PendingServerShutdown(info)); + if (info != null) { + msgQueue.put(new PendingServerShutdown(info)); + } } catch (InterruptedException e) { throw new RuntimeException("Putting into msgQueue was interrupted.", e); } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerAbort.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerAbort.java?rev=568700&r1=568699&r2=568700&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerAbort.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerAbort.java Wed Aug 22 09:59:43 2007 @@ -37,7 +37,7 @@ super(2); conf.setInt("ipc.client.timeout", 5000); // reduce client timeout conf.setInt("ipc.client.connect.max.retries", 5); // and number of retries - conf.setInt("hbase.client.retries.number", 3); // reduce HBase retries + conf.setInt("hbase.client.retries.number", 5); // reduce HBase retries Logger.getRootLogger().setLevel(Level.WARN); Logger.getLogger(this.getClass().getPackage().getName()).setLevel(Level.DEBUG); }