Author: stack Date: Thu Dec 27 14:26:27 2007 New Revision: 607131 URL: http://svn.apache.org/viewvc?rev=607131&view=rev Log: HADOOP-2485 Make mapfile index interval configurable
Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-default.xml lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreFile.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=607131&r1=607130&r2=607131&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original) +++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Thu Dec 27 14:26:27 2007 @@ -20,6 +20,8 @@ OPTIMIZATIONS HADOOP-2479 Save on number of Text object creations + HADOOP-2485 Make mapfile index interval configurable (Set default to 32 + instead of 128) BUG FIXES HADOOP-2059 In tests, exceptions in min dfs shutdown should not fail test Modified: lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-default.xml URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-default.xml?rev=607131&r1=607130&r2=607131&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-default.xml (original) +++ lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-default.xml Thu Dec 27 14:26:27 2007 @@ -201,6 +201,16 @@ <description>How often a region server runs the split/compaction check. </description> </property> + <property> + <name>hbase.io.index.interval</name> + <value>32</value> + <description>The interval at which we record offsets in hbase + store files/mapfiles. Default for stock mapfiles is 128. Index + files are read into memory. If there are many of them, could prove + a burden. If so play with the hadoop io.map.index.skip property and + skip every nth index member when reading back the index into memory. + </description> + </property> <!-- HbaseShell Configurations --> <property> Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java?rev=607131&r1=607130&r2=607131&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java Thu Dec 27 14:26:27 2007 @@ -937,11 +937,13 @@ // // Related, looks like 'merging compactions' in BigTable paper interlaces // a memcache flush. We don't. + int entries = 0; try { for (Map.Entry<HStoreKey, byte []> es: cache.entrySet()) { HStoreKey curkey = es.getKey(); TextSequence f = HStoreKey.extractFamily(curkey.getColumn()); if (f.equals(this.familyName)) { + entries++; out.append(curkey, new ImmutableBytesWritable(es.getValue())); } } @@ -967,10 +969,10 @@ flushedFile.getReader(this.fs, this.bloomFilter)); this.storefiles.put(flushid, flushedFile); if(LOG.isDebugEnabled()) { - LOG.debug("Added " + name + - " with sequence id " + logCacheFlushId + " and size " + - StringUtils.humanReadableInt(flushedFile.length()) + " for " + - this.regionName + "/" + this.familyName); + LOG.debug("Added " + name + " with " + entries + + " entries, sequence id " + logCacheFlushId + ", and size " + + StringUtils.humanReadableInt(flushedFile.length()) + " for " + + this.regionName + "/" + this.familyName); } } finally { this.lock.writeLock().unlock(); Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreFile.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreFile.java?rev=607131&r1=607130&r2=607131&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreFile.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreFile.java Thu Dec 27 14:26:27 2007 @@ -655,136 +655,37 @@ } /** - * A facade for a [EMAIL PROTECTED] MapFile.Reader} that serves up either the top or - * bottom half of a MapFile (where 'bottom' is the first half of the file - * containing the keys that sort lowest and 'top' is the second half of the - * file with keys that sort greater than those of the bottom half). - * Subclasses BloomFilterMapFile.Reader in case - * - * <p>This file is not splitable. Calls to [EMAIL PROTECTED] #midKey()} return null. + * Hbase customizations of MapFile. */ - static class HalfMapFileReader extends BloomFilterMapFile.Reader { - private final boolean top; - private final WritableComparable midkey; - private boolean topFirstNextCall = true; - - HalfMapFileReader(final FileSystem fs, final String dirName, - final Configuration conf, final Range r, - final WritableComparable midKey) - throws IOException { - this(fs, dirName, conf, r, midKey, null); - } - - HalfMapFileReader(final FileSystem fs, final String dirName, - final Configuration conf, final Range r, - final WritableComparable midKey, final Filter filter) - throws IOException { - super(fs, dirName, conf, filter); - this.top = isTopFileRegion(r); - this.midkey = midKey; - } - - @SuppressWarnings("unchecked") - private void checkKey(final WritableComparable key) - throws IOException { - if (this.top) { - if (key.compareTo(this.midkey) < 0) { - throw new IOException("Illegal Access: Key is less than midKey of " + - "backing mapfile"); - } - } else if (key.compareTo(this.midkey) >= 0) { - throw new IOException("Illegal Access: Key is greater than or equal " + - "to midKey of backing mapfile"); - } - } - - /** [EMAIL PROTECTED] */ - @SuppressWarnings({ "unused"}) - @Override - public synchronized void finalKey(WritableComparable key) - throws IOException { - throw new UnsupportedOperationException("Unsupported"); - } - - /** [EMAIL PROTECTED] */ - @Override - public synchronized Writable get(WritableComparable key, Writable val) - throws IOException { - checkKey(key); - return super.get(key, val); - } - - /** [EMAIL PROTECTED] */ - @SuppressWarnings("unchecked") - @Override - public synchronized WritableComparable getClosest(WritableComparable key, - Writable val) - throws IOException { - if (this.top) { - if (key.compareTo(this.midkey) < 0) { - return this.midkey; - } - } else if (key.compareTo(this.midkey) >= 0) { - // Contract says return null if EOF. - return null; - } - return super.getClosest(key, val); - } - - /** [EMAIL PROTECTED] */ - @SuppressWarnings("unused") - @Override - public synchronized WritableComparable midKey() throws IOException { - // Returns null to indicate file is not splitable. - return null; - } + static class HbaseMapFile extends MapFile { - /** [EMAIL PROTECTED] */ - @SuppressWarnings("unchecked") - @Override - public synchronized boolean next(WritableComparable key, Writable val) - throws IOException { - if (this.top && this.topFirstNextCall) { - this.topFirstNextCall = false; - return doFirstNextProcessing(key, val); - } - boolean result = super.next(key, val); - if (!top && key.compareTo(this.midkey) >= 0) { - result = false; + static class HbaseReader extends MapFile.Reader { + public HbaseReader(FileSystem fs, String dirName, Configuration conf) + throws IOException { + super(fs, dirName, conf); + // Force reading of the mapfile index by calling midKey. + // Reading the index will bring the index into memory over + // here on the client and then close the index file freeing + // up socket connection and resources in the datanode. + // Usually, the first access on a MapFile.Reader will load the + // index force the issue in HStoreFile MapFiles because an + // access may not happen for some time; meantime we're + // using up datanode resources. See HADOOP-2341. + midKey(); } - return result; } - private boolean doFirstNextProcessing(WritableComparable key, Writable val) - throws IOException { - // Seek to midkey. Midkey may not exist in this file. That should be - // fine. Then we'll either be positioned at end or start of file. - WritableComparable nearest = getClosest(this.midkey, val); - // Now copy the mid key into the passed key. - if (nearest != null) { - Writables.copyWritable(nearest, key); - return true; - } - return false; - } - - /** [EMAIL PROTECTED] */ - @Override - public synchronized void reset() throws IOException { - if (top) { - this.topFirstNextCall = true; - seek(this.midkey); - return; + static class HbaseWriter extends MapFile.Writer { + public HbaseWriter(Configuration conf, FileSystem fs, String dirName, + Class<Writable> keyClass, Class<Writable> valClass, + SequenceFile.CompressionType compression) + throws IOException { + super(conf, fs, dirName, keyClass, valClass, compression); + // Default for mapfiles is 128. Makes random reads faster if we + // have more keys indexed and we're not 'next'-ing around in the + // mapfile. + setIndexInterval(conf.getInt("hbase.index.interval", 128)); } - super.reset(); - } - - /** [EMAIL PROTECTED] */ - @Override - public synchronized boolean seek(WritableComparable key) - throws IOException { - checkKey(key); - return super.seek(key); } } @@ -793,40 +694,17 @@ * tested first against bloom filter. Keys are HStoreKey. If passed bloom * filter is null, just passes invocation to parent. */ - static class BloomFilterMapFile extends MapFile { - protected BloomFilterMapFile() { - super(); - } - - static class Reader extends MapFile.Reader { + static class BloomFilterMapFile extends HbaseMapFile { + static class Reader extends HbaseReader { private final Filter bloomFilter; - /** - * Constructor - * - * @param fs - * @param dirName - * @param conf - * @param filter - * @throws IOException - */ public Reader(FileSystem fs, String dirName, Configuration conf, final Filter filter) throws IOException { super(fs, dirName, conf); this.bloomFilter = filter; - // Force reading of the mapfile index by calling midKey. - // Reading the index will bring the index into memory over - // here on the client and then close the index file freeing - // up socket connection and resources in the datanode. - // Usually, the first access on a MapFile.Reader will load the - // index force the issue in HStoreFile MapFiles because an - // access may not happen for some time; meantime we're - // using up datanode resources. See HADOOP-2341. - midKey(); } - /** [EMAIL PROTECTED] */ @Override public Writable get(WritableComparable key, Writable val) throws IOException { @@ -845,7 +723,6 @@ return null; } - /** [EMAIL PROTECTED] */ @Override public WritableComparable getClosest(WritableComparable key, Writable val) @@ -867,21 +744,9 @@ } } - static class Writer extends MapFile.Writer { + static class Writer extends HbaseWriter { private final Filter bloomFilter; - /** - * Constructor - * - * @param conf - * @param fs - * @param dirName - * @param keyClass - * @param valClass - * @param compression - * @param filter - * @throws IOException - */ @SuppressWarnings("unchecked") public Writer(Configuration conf, FileSystem fs, String dirName, Class keyClass, Class valClass, @@ -890,8 +755,7 @@ super(conf, fs, dirName, keyClass, valClass, compression); this.bloomFilter = filter; } - - /** [EMAIL PROTECTED] */ + @Override public void append(WritableComparable key, Writable val) throws IOException { @@ -964,6 +828,140 @@ return new BloomFilterMapFile.Writer(conf, fs, getMapFilePath().toString(), HStoreKey.class, ImmutableBytesWritable.class, compression, bloomFilter); + } + + /** + * A facade for a [EMAIL PROTECTED] MapFile.Reader} that serves up either the top or + * bottom half of a MapFile (where 'bottom' is the first half of the file + * containing the keys that sort lowest and 'top' is the second half of the + * file with keys that sort greater than those of the bottom half). + * Subclasses BloomFilterMapFile.Reader in case + * + * <p>This file is not splitable. Calls to [EMAIL PROTECTED] #midKey()} return null. + */ + static class HalfMapFileReader extends BloomFilterMapFile.Reader { + private final boolean top; + private final WritableComparable midkey; + private boolean topFirstNextCall = true; + + HalfMapFileReader(final FileSystem fs, final String dirName, + final Configuration conf, final Range r, + final WritableComparable midKey) + throws IOException { + this(fs, dirName, conf, r, midKey, null); + } + + HalfMapFileReader(final FileSystem fs, final String dirName, + final Configuration conf, final Range r, + final WritableComparable midKey, final Filter filter) + throws IOException { + super(fs, dirName, conf, filter); + this.top = isTopFileRegion(r); + this.midkey = midKey; + } + + @SuppressWarnings("unchecked") + private void checkKey(final WritableComparable key) + throws IOException { + if (this.top) { + if (key.compareTo(this.midkey) < 0) { + throw new IOException("Illegal Access: Key is less than midKey of " + + "backing mapfile"); + } + } else if (key.compareTo(this.midkey) >= 0) { + throw new IOException("Illegal Access: Key is greater than or equal " + + "to midKey of backing mapfile"); + } + } + + /** [EMAIL PROTECTED] */ + @SuppressWarnings({ "unused"}) + @Override + public synchronized void finalKey(WritableComparable key) + throws IOException { + throw new UnsupportedOperationException("Unsupported"); + } + + /** [EMAIL PROTECTED] */ + @Override + public synchronized Writable get(WritableComparable key, Writable val) + throws IOException { + checkKey(key); + return super.get(key, val); + } + + /** [EMAIL PROTECTED] */ + @SuppressWarnings("unchecked") + @Override + public synchronized WritableComparable getClosest(WritableComparable key, + Writable val) + throws IOException { + if (this.top) { + if (key.compareTo(this.midkey) < 0) { + return this.midkey; + } + } else if (key.compareTo(this.midkey) >= 0) { + // Contract says return null if EOF. + return null; + } + return super.getClosest(key, val); + } + + /** [EMAIL PROTECTED] */ + @SuppressWarnings("unused") + @Override + public synchronized WritableComparable midKey() throws IOException { + // Returns null to indicate file is not splitable. + return null; + } + + /** [EMAIL PROTECTED] */ + @SuppressWarnings("unchecked") + @Override + public synchronized boolean next(WritableComparable key, Writable val) + throws IOException { + if (this.top && this.topFirstNextCall) { + this.topFirstNextCall = false; + return doFirstNextProcessing(key, val); + } + boolean result = super.next(key, val); + if (!top && key.compareTo(this.midkey) >= 0) { + result = false; + } + return result; + } + + private boolean doFirstNextProcessing(WritableComparable key, Writable val) + throws IOException { + // Seek to midkey. Midkey may not exist in this file. That should be + // fine. Then we'll either be positioned at end or start of file. + WritableComparable nearest = getClosest(this.midkey, val); + // Now copy the mid key into the passed key. + if (nearest != null) { + Writables.copyWritable(nearest, key); + return true; + } + return false; + } + + /** [EMAIL PROTECTED] */ + @Override + public synchronized void reset() throws IOException { + if (top) { + this.topFirstNextCall = true; + seek(this.midkey); + return; + } + super.reset(); + } + + /** [EMAIL PROTECTED] */ + @Override + public synchronized boolean seek(WritableComparable key) + throws IOException { + checkKey(key); + return super.seek(key); + } } /**