Author: stack Date: Fri Dec 21 13:58:25 2007 New Revision: 606332 URL: http://svn.apache.org/viewvc?rev=606332&view=rev Log: HADOOP-2479 Save on number of Text object creations
Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=606332&r1=606331&r2=606332&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original) +++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Fri Dec 21 13:58:25 2007 @@ -19,6 +19,7 @@ (Bryan Duxbury via Stack) OPTIMIZATIONS + HADOOP-2479 Save on number of Text object creations BUG FIXES HADOOP-2059 In tests, exceptions in min dfs shutdown should not fail test Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java?rev=606332&r1=606331&r2=606332&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java Fri Dec 21 13:58:25 2007 @@ -64,7 +64,7 @@ private static class ColumnMatcher { private boolean wildCardmatch; private MATCH_TYPE matchType; - private String family; + private Text family; private Pattern columnMatcher; private Text col; @@ -73,7 +73,7 @@ try { if(qualifier == null || qualifier.getLength() == 0) { this.matchType = MATCH_TYPE.FAMILY_ONLY; - this.family = HStoreKey.extractFamily(col).toString(); + this.family = HStoreKey.extractFamily(col).toText(); this.wildCardmatch = true; } else if(isRegexPattern.matcher(qualifier.toString()).matches()) { this.matchType = MATCH_TYPE.REGEX; @@ -93,13 +93,10 @@ boolean matches(Text c) throws IOException { if(this.matchType == MATCH_TYPE.SIMPLE) { return c.equals(this.col); - } else if(this.matchType == MATCH_TYPE.FAMILY_ONLY) { - return HStoreKey.extractFamily(c).toString().equals(this.family); - + return HStoreKey.extractFamily(c).equals(this.family); } else if(this.matchType == MATCH_TYPE.REGEX) { return this.columnMatcher.matcher(c.toString()).matches(); - } else { throw new IOException("Invalid match type: " + this.matchType); } @@ -130,7 +127,7 @@ this.multipleMatchers = false; this.okCols = new TreeMap<Text, Vector<ColumnMatcher>>(); for(int i = 0; i < targetCols.length; i++) { - Text family = HStoreKey.extractFamily(targetCols[i]); + Text family = HStoreKey.extractFamily(targetCols[i]).toText(); Vector<ColumnMatcher> matchers = okCols.get(family); if(matchers == null) { matchers = new Vector<ColumnMatcher>(); @@ -160,8 +157,8 @@ */ boolean columnMatch(int i) throws IOException { Text column = keys[i].getColumn(); - Text family = HStoreKey.extractFamily(column); - Vector<ColumnMatcher> matchers = okCols.get(family); + Vector<ColumnMatcher> matchers = + okCols.get(HStoreKey.extractFamily(column)); if(matchers == null) { return false; } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java?rev=606332&r1=606331&r2=606332&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java Fri Dec 21 13:58:25 2007 @@ -283,7 +283,7 @@ long maxSeqId = -1; for(Map.Entry<Text, HColumnDescriptor> e : this.regionInfo.getTableDesc().families().entrySet()) { - Text colFamily = HStoreKey.extractFamily(e.getKey()); + Text colFamily = HStoreKey.extractFamily(e.getKey()).toText(); HStore store = new HStore(rootDir, this.regionInfo.getRegionName(), this.encodedRegionName, e.getValue(), fs, oldLogFile, conf); Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java?rev=606332&r1=606331&r2=606332&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java Fri Dec 21 13:58:25 2007 @@ -44,6 +44,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.filter.RowFilterInterface; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.io.TextSequence; import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; @@ -565,7 +566,7 @@ this.regionName = regionName; this.encodedRegionName = encodedName; this.family = family; - this.familyName = HStoreKey.extractFamily(this.family.getName()); + this.familyName = HStoreKey.extractFamily(this.family.getName()).toText(); this.compression = SequenceFile.CompressionType.NONE; this.storeName = this.encodedRegionName + "/" + this.familyName.toString(); @@ -939,8 +940,8 @@ try { for (Map.Entry<HStoreKey, byte []> es: cache.entrySet()) { HStoreKey curkey = es.getKey(); - if (this.familyName.equals(HStoreKey.extractFamily( - curkey.getColumn()))) { + TextSequence f = HStoreKey.extractFamily(curkey.getColumn()); + if (f.equals(this.familyName)) { out.append(curkey, new ImmutableBytesWritable(es.getValue())); } } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java?rev=606332&r1=606331&r2=606332&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java Fri Dec 21 13:58:25 2007 @@ -19,6 +19,7 @@ */ package org.apache.hadoop.hbase; +import org.apache.hadoop.hbase.io.TextSequence; import org.apache.hadoop.io.*; import java.io.*; @@ -27,94 +28,14 @@ * A Key for a stored row */ public class HStoreKey implements WritableComparable { - public static final char COLUMN_FAMILY_DELIMITER = ':'; - - // TODO: Move these utility methods elsewhere (To a Column class?). - /** - * Extracts the column family name from a column - * For example, returns 'info' if the specified column was 'info:server' - * @param col name of column - * @return column family name - * @throws InvalidColumnNameException - */ - public static Text extractFamily(final Text col) - throws InvalidColumnNameException { - return extractFamily(col, false); - } - /** - * Extracts the column family name from a column - * For example, returns 'info' if the specified column was 'info:server' - * @param col name of column - * @param withColon if returned family name should include the ':' suffix. - * @return column family name - * @throws InvalidColumnNameException + * Colon character in UTF-8 */ - public static Text extractFamily(final Text col, final boolean withColon) - throws InvalidColumnNameException { - int offset = getColonOffset(col); - // Include ':' in copy? - offset += (withColon)? 1: 0; - if (offset == col.getLength()) { - return col; - } - byte [] buffer = new byte[offset]; - System.arraycopy(col.getBytes(), 0, buffer, 0, offset); - return new Text(buffer); - } - - /** - * Extracts the column qualifier, the portion that follows the colon (':') - * family/qualifier separator. - * For example, returns 'server' if the specified column was 'info:server' - * @param col name of column - * @return column qualifier or null if there is no qualifier. - * @throws InvalidColumnNameException - */ - public static Text extractQualifier(final Text col) - throws InvalidColumnNameException { - int offset = getColonOffset(col); - if (offset + 1 == col.getLength()) { - return null; - } - int bufferLength = col.getLength() - (offset + 1); - byte [] buffer = new byte[bufferLength]; - System.arraycopy(col.getBytes(), offset + 1, buffer, 0, bufferLength); - return new Text(buffer); - } + public static final char COLUMN_FAMILY_DELIMITER = ':'; - private static int getColonOffset(final Text col) - throws InvalidColumnNameException { - int offset = -1; - for (int i = 0; i < col.getLength(); i++) { - if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) { - offset = i; - break; - } - } - if(offset < 0) { - throw new InvalidColumnNameException(col + " is missing the colon " + - "family/qualifier separator"); - } - return offset; - } - - /** - * Returns row and column bytes out of an HStoreKey. - * @param hsk Store key. - * @return byte array encoding of HStoreKey - * @throws UnsupportedEncodingException - */ - public static byte[] getBytes(final HStoreKey hsk) - throws UnsupportedEncodingException { - StringBuilder s = new StringBuilder(hsk.getRow().toString()); - s.append(hsk.getColumn().toString()); - return s.toString().getBytes(HConstants.UTF8_ENCODING); - } - - Text row; - Text column; - long timestamp; + private Text row; + private Text column; + private long timestamp; /** Default constructor used in conjunction with Writable interface */ @@ -163,6 +84,7 @@ * @param timestamp timestamp value */ public HStoreKey(Text row, Text column, long timestamp) { + // Make copies by doing 'new Text(arg)'. this.row = new Text(row); this.column = new Text(column); this.timestamp = timestamp; @@ -338,5 +260,91 @@ row.readFields(in); column.readFields(in); timestamp = in.readLong(); + } + + // Statics + // TODO: Move these utility methods elsewhere (To a Column class?). + + /** + * Extracts the column family name from a column + * For example, returns 'info' if the specified column was 'info:server' + * @param col name of column + * @return column famile as a TextSequence based on the passed + * <code>col</code>. If <code>col</code> is reused, make a new Text of + * the result by calling [EMAIL PROTECTED] TextSequence#toText()}. + * @throws InvalidColumnNameException + */ + public static TextSequence extractFamily(final Text col) + throws InvalidColumnNameException { + return extractFamily(col, false); + } + + /** + * Extracts the column family name from a column + * For example, returns 'info' if the specified column was 'info:server' + * @param col name of column + * @return column famile as a TextSequence based on the passed + * <code>col</code>. If <code>col</code> is reused, make a new Text of + * the result by calling [EMAIL PROTECTED] TextSequence#toText()}. + * @throws InvalidColumnNameException + */ + public static TextSequence extractFamily(final Text col, + final boolean withColon) + throws InvalidColumnNameException { + int offset = getColonOffset(col); + // Include ':' in copy? + offset += (withColon)? 1: 0; + if (offset == col.getLength()) { + return new TextSequence(col); + } + return new TextSequence(col, 0, offset); + } + + /** + * Extracts the column qualifier, the portion that follows the colon (':') + * family/qualifier separator. + * For example, returns 'server' if the specified column was 'info:server' + * @param col name of column + * @return column qualifier as a TextSequence based on the passed + * <code>col</code>. If <code>col</code> is reused, make a new Text of + * the result by calling [EMAIL PROTECTED] TextSequence#toText()}. + * @throws InvalidColumnNameException + */ + public static TextSequence extractQualifier(final Text col) + throws InvalidColumnNameException { + int offset = getColonOffset(col); + if (offset + 1 == col.getLength()) { + return null; + } + return new TextSequence(col, offset + 1); + } + + private static int getColonOffset(final Text col) + throws InvalidColumnNameException { + int offset = -1; + for (int i = 0; i < col.getLength(); i++) { + if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) { + offset = i; + break; + } + } + if(offset < 0) { + throw new InvalidColumnNameException(col + " is missing the colon " + + "family/qualifier separator"); + } + return offset; + } + + /** + * Returns row and column bytes out of an HStoreKey. + * @param hsk Store key. + * @return byte array encoding of HStoreKey + * @throws UnsupportedEncodingException + */ + public static byte[] getBytes(final HStoreKey hsk) + throws UnsupportedEncodingException { + StringBuilder s = new StringBuilder(hsk.getRow().toString()); + s.append(hsk.getColumn().toString()); + return s.toString().getBytes(HConstants.UTF8_ENCODING); } } Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java?rev=606332&r1=606331&r2=606332&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java Fri Dec 21 13:58:25 2007 @@ -21,7 +21,6 @@ import java.io.IOException; import java.io.PrintStream; -import java.io.UnsupportedEncodingException; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; @@ -472,25 +471,28 @@ } } + /* + * Format passed integer. + * This method takes some time and is done inline uploading data. For + * example, doing the mapfile test, generation of the key and value + * consumes about 30% of CPU time. + * @param i + * @return Integer as String zero padded. + */ static Text format(final int i) { return new Text(String.format("%010d", Integer.valueOf(i))); } /* + * This method takes some time and is done inline uploading data. For + * example, doing the mapfile test, generation of the key and value + * consumes about 30% of CPU time. * @return Generated random value to insert into a table cell. */ static byte[] generateValue(final Random r) { - StringBuilder val = new StringBuilder(); - while(val.length() < ROW_LENGTH) { - val.append(Long.toString(r.nextLong())); - } - byte[] value = null; - try { - value = val.toString().getBytes(HConstants.UTF8_ENCODING); - } catch (UnsupportedEncodingException e) { - assert(false); - } - return value; + byte [] b = new byte [ROW_LENGTH]; + r.nextBytes(b); + return b; } static Text getRandomRow(final Random random, final int totalRows) { @@ -556,7 +558,7 @@ Random random = new Random(); Configuration c = new Configuration(); FileSystem fs = FileSystem.get(c); - Path mf = new Path("performanceevaluation.mapfile"); + Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile")); if (fs.exists(mf)) { fs.delete(mf); } @@ -571,7 +573,9 @@ } writer.close(); LOG.info("Writing " + ROW_COUNT + " records took " + - (System.currentTimeMillis() - startTime) + "ms"); + (System.currentTimeMillis() - startTime) + "ms (Note: generation of keys " + + "and values is done inline and has been seen to consume " + + "significant time: e.g. ~30% of cpu time"); // Do random reads. LOG.info("Reading " + ROW_COUNT + " random rows"); MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c); @@ -585,7 +589,9 @@ } reader.close(); LOG.info("Reading " + ROW_COUNT + " random records took " + - (System.currentTimeMillis() - startTime) + "ms"); + (System.currentTimeMillis() - startTime) + "ms (Note: generation of " + + "random key is done in line and takes a significant amount of cpu " + + "time: e.g 10-15%"); // Do random reads. LOG.info("Reading " + ROW_COUNT + " rows sequentially"); reader = new MapFile.Reader(fs, mf.toString(), c); @@ -599,7 +605,7 @@ LOG.info("Reading " + ROW_COUNT + " records serially took " + (System.currentTimeMillis() - startTime) + "ms"); } - + private void runTest(final String cmd) throws IOException { if (cmd.equals(RANDOM_READ_MEM)) { // For this one test, so all fits in memory, make R smaller (See