svn commit: r606332 - in /lucene/hadoop/trunk/src/contrib/hbase: ./ src/java/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/

stack Fri, 21 Dec 2007 14:05:30 -0800

Author: stack
Date: Fri Dec 21 13:58:25 2007
New Revision: 606332

URL: http://svn.apache.org/viewvc?rev=606332&view=rev
Log:
HADOOP-2479 Save on number of Text object creations


Modified:
    lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
    
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java
    
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
    
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java
    
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java
    
lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java

Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Fri Dec 21 13:58:25 2007
@@ -19,6 +19,7 @@
                 (Bryan Duxbury via Stack)
 
   OPTIMIZATIONS
+   HADOOP-2479 Save on number of Text object creations
 
   BUG FIXES
    HADOOP-2059 In tests, exceptions in min dfs shutdown should not fail test

Modified: 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java
 (original)
+++ 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java
 Fri Dec 21 13:58:25 2007
@@ -64,7 +64,7 @@
   private static class ColumnMatcher {
     private boolean wildCardmatch;
     private MATCH_TYPE matchType;
-    private String family;
+    private Text family;
     private Pattern columnMatcher;
     private Text col;
   
@@ -73,7 +73,7 @@
       try {
         if(qualifier == null || qualifier.getLength() == 0) {
           this.matchType = MATCH_TYPE.FAMILY_ONLY;
-          this.family = HStoreKey.extractFamily(col).toString();
+          this.family = HStoreKey.extractFamily(col).toText();
           this.wildCardmatch = true;
         } else if(isRegexPattern.matcher(qualifier.toString()).matches()) {
           this.matchType = MATCH_TYPE.REGEX;
@@ -93,13 +93,10 @@
     boolean matches(Text c) throws IOException {
       if(this.matchType == MATCH_TYPE.SIMPLE) {
         return c.equals(this.col);
-        
       } else if(this.matchType == MATCH_TYPE.FAMILY_ONLY) {
-        return HStoreKey.extractFamily(c).toString().equals(this.family);
-        
+        return HStoreKey.extractFamily(c).equals(this.family);
       } else if(this.matchType == MATCH_TYPE.REGEX) {
         return this.columnMatcher.matcher(c.toString()).matches();
-        
       } else {
         throw new IOException("Invalid match type: " + this.matchType);
       }
@@ -130,7 +127,7 @@
     this.multipleMatchers = false;
     this.okCols = new TreeMap<Text, Vector<ColumnMatcher>>();
     for(int i = 0; i < targetCols.length; i++) {
-      Text family = HStoreKey.extractFamily(targetCols[i]);
+      Text family = HStoreKey.extractFamily(targetCols[i]).toText();
       Vector<ColumnMatcher> matchers = okCols.get(family);
       if(matchers == null) {
         matchers = new Vector<ColumnMatcher>();
@@ -160,8 +157,8 @@
    */
   boolean columnMatch(int i) throws IOException {
     Text column = keys[i].getColumn();
-    Text family = HStoreKey.extractFamily(column);
-    Vector<ColumnMatcher> matchers = okCols.get(family);
+    Vector<ColumnMatcher> matchers =
+      okCols.get(HStoreKey.extractFamily(column));
     if(matchers == null) {
       return false;
     }

Modified: 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
 (original)
+++ 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
 Fri Dec 21 13:58:25 2007
@@ -283,7 +283,7 @@
     long maxSeqId = -1;
     for(Map.Entry<Text, HColumnDescriptor> e :
       this.regionInfo.getTableDesc().families().entrySet()) {
-      Text colFamily = HStoreKey.extractFamily(e.getKey());
+      Text colFamily = HStoreKey.extractFamily(e.getKey()).toText();
 
       HStore store = new HStore(rootDir, this.regionInfo.getRegionName(),
           this.encodedRegionName, e.getValue(), fs, oldLogFile, conf); 

Modified: 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java
 (original)
+++ 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java
 Fri Dec 21 13:58:25 2007
@@ -44,6 +44,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.filter.RowFilterInterface;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.TextSequence;
 import org.apache.hadoop.io.MapFile;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
@@ -565,7 +566,7 @@
     this.regionName = regionName;
     this.encodedRegionName = encodedName;
     this.family = family;
-    this.familyName = HStoreKey.extractFamily(this.family.getName());
+    this.familyName = HStoreKey.extractFamily(this.family.getName()).toText();
     this.compression = SequenceFile.CompressionType.NONE;
     this.storeName = this.encodedRegionName + "/" + this.familyName.toString();
     
@@ -939,8 +940,8 @@
       try {
         for (Map.Entry<HStoreKey, byte []> es: cache.entrySet()) {
           HStoreKey curkey = es.getKey();
-          if (this.familyName.equals(HStoreKey.extractFamily(
-              curkey.getColumn()))) {
+          TextSequence f = HStoreKey.extractFamily(curkey.getColumn());
+          if (f.equals(this.familyName)) {
             out.append(curkey, new ImmutableBytesWritable(es.getValue()));
           }
         }

Modified: 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java
 (original)
+++ 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java
 Fri Dec 21 13:58:25 2007
@@ -19,6 +19,7 @@
  */
 package org.apache.hadoop.hbase;
 
+import org.apache.hadoop.hbase.io.TextSequence;
 import org.apache.hadoop.io.*;
 
 import java.io.*;
@@ -27,94 +28,14 @@
  * A Key for a stored row
  */
 public class HStoreKey implements WritableComparable {
-  public static final char COLUMN_FAMILY_DELIMITER = ':';
-  
-  // TODO: Move these utility methods elsewhere (To a Column class?).
-  /**
-   * Extracts the column family name from a column
-   * For example, returns 'info' if the specified column was 'info:server'
-   * @param col name of column
-   * @return column family name
-   * @throws InvalidColumnNameException 
-   */
-  public static Text extractFamily(final Text col)
-  throws InvalidColumnNameException {
-    return extractFamily(col, false);
-  }
-  
   /**
-   * Extracts the column family name from a column
-   * For example, returns 'info' if the specified column was 'info:server'
-   * @param col name of column
-   * @param withColon if returned family name should include the ':' suffix.
-   * @return column family name
-   * @throws InvalidColumnNameException 
+   * Colon character in UTF-8
    */
-  public static Text extractFamily(final Text col, final boolean withColon)
-  throws InvalidColumnNameException {
-    int offset = getColonOffset(col);
-    // Include ':' in copy?
-    offset += (withColon)? 1: 0;
-    if (offset == col.getLength()) {
-      return col;
-    }
-    byte [] buffer = new byte[offset];
-    System.arraycopy(col.getBytes(), 0, buffer, 0, offset);
-    return new Text(buffer);
-  }
-  
-  /**
-   * Extracts the column qualifier, the portion that follows the colon (':')
-   * family/qualifier separator.
-   * For example, returns 'server' if the specified column was 'info:server'
-   * @param col name of column
-   * @return column qualifier or null if there is no qualifier.
-   * @throws InvalidColumnNameException 
-   */
-  public static Text extractQualifier(final Text col)
-  throws InvalidColumnNameException {
-    int offset = getColonOffset(col);
-    if (offset + 1 == col.getLength()) {
-      return null;
-    }
-    int bufferLength = col.getLength() - (offset + 1);
-    byte [] buffer = new byte[bufferLength];
-    System.arraycopy(col.getBytes(), offset + 1, buffer, 0, bufferLength);
-    return new Text(buffer);
-  }
+  public static final char COLUMN_FAMILY_DELIMITER = ':';
   
-  private static int getColonOffset(final Text col)
-  throws InvalidColumnNameException {
-    int offset = -1;
-    for (int i = 0; i < col.getLength(); i++) {
-      if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) {
-        offset = i;
-        break;
-      }
-    }
-    if(offset < 0) {
-      throw new InvalidColumnNameException(col + " is missing the colon " +
-        "family/qualifier separator");
-    }
-    return offset;
-  }
-
-  /**
-   * Returns row and column bytes out of an HStoreKey.
-   * @param hsk Store key.
-   * @return byte array encoding of HStoreKey
-   * @throws UnsupportedEncodingException
-   */
-  public static byte[] getBytes(final HStoreKey hsk)
-  throws UnsupportedEncodingException {
-    StringBuilder s = new StringBuilder(hsk.getRow().toString());
-    s.append(hsk.getColumn().toString());
-    return s.toString().getBytes(HConstants.UTF8_ENCODING);
-  }
-
-  Text row;
-  Text column;
-  long timestamp;
+  private Text row;
+  private Text column;
+  private long timestamp;
 
 
   /** Default constructor used in conjunction with Writable interface */
@@ -163,6 +84,7 @@
    * @param timestamp timestamp value
    */
   public HStoreKey(Text row, Text column, long timestamp) {
+    // Make copies by doing 'new Text(arg)'.
     this.row = new Text(row);
     this.column = new Text(column);
     this.timestamp = timestamp;
@@ -338,5 +260,91 @@
     row.readFields(in);
     column.readFields(in);
     timestamp = in.readLong();
+  }
+  
+  // Statics
+  // TODO: Move these utility methods elsewhere (To a Column class?).
+  
+  /**
+   * Extracts the column family name from a column
+   * For example, returns 'info' if the specified column was 'info:server'
+   * @param col name of column
+   * @return column famile as a TextSequence based on the passed
+   * <code>col</code>.  If <code>col</code> is reused, make a new Text of
+   * the result by calling [EMAIL PROTECTED] TextSequence#toText()}.
+   * @throws InvalidColumnNameException 
+   */
+  public static TextSequence extractFamily(final Text col)
+  throws InvalidColumnNameException {
+    return extractFamily(col, false);
+  }
+  
+  /**
+   * Extracts the column family name from a column
+   * For example, returns 'info' if the specified column was 'info:server'
+   * @param col name of column
+   * @return column famile as a TextSequence based on the passed
+   * <code>col</code>.  If <code>col</code> is reused, make a new Text of
+   * the result by calling [EMAIL PROTECTED] TextSequence#toText()}.
+   * @throws InvalidColumnNameException 
+   */
+  public static TextSequence extractFamily(final Text col,
+    final boolean withColon)
+  throws InvalidColumnNameException {
+    int offset = getColonOffset(col);
+    // Include ':' in copy?
+    offset += (withColon)? 1: 0;
+    if (offset == col.getLength()) {
+      return new TextSequence(col);
+    }
+    return new TextSequence(col, 0, offset);
+  }
+  
+  /**
+   * Extracts the column qualifier, the portion that follows the colon (':')
+   * family/qualifier separator.
+   * For example, returns 'server' if the specified column was 'info:server'
+   * @param col name of column
+   * @return column qualifier as a TextSequence based on the passed
+   * <code>col</code>.  If <code>col</code> is reused, make a new Text of
+   * the result by calling [EMAIL PROTECTED] TextSequence#toText()}.
+   * @throws InvalidColumnNameException 
+   */
+  public static TextSequence extractQualifier(final Text col)
+  throws InvalidColumnNameException {
+    int offset = getColonOffset(col);
+    if (offset + 1 == col.getLength()) {
+      return null;
+    }
+    return new TextSequence(col, offset + 1);
+  }
+  
+  private static int getColonOffset(final Text col)
+  throws InvalidColumnNameException {
+    int offset = -1;
+    for (int i = 0; i < col.getLength(); i++) {
+      if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) {
+        offset = i;
+        break;
+      }
+    }
+    if(offset < 0) {
+      throw new InvalidColumnNameException(col + " is missing the colon " +
+        "family/qualifier separator");
+    }
+    return offset;
+  }
+
+  /**
+   * Returns row and column bytes out of an HStoreKey.
+   * @param hsk Store key.
+   * @return byte array encoding of HStoreKey
+   * @throws UnsupportedEncodingException
+   */
+  public static byte[] getBytes(final HStoreKey hsk)
+  throws UnsupportedEncodingException {
+    StringBuilder s = new StringBuilder(hsk.getRow().toString());
+    s.append(hsk.getColumn().toString());
+    return s.toString().getBytes(HConstants.UTF8_ENCODING);
   }
 }

Modified: 
lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java
 (original)
+++ 
lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java
 Fri Dec 21 13:58:25 2007
@@ -21,7 +21,6 @@
 
 import java.io.IOException;
 import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.Date;
@@ -472,25 +471,28 @@
     }
   }
   
+  /*
+   * Format passed integer.
+   * This method takes some time and is done inline uploading data.  For
+   * example, doing the mapfile test, generation of the key and value
+   * consumes about 30% of CPU time.
+   * @param i
+   * @return Integer as String zero padded.
+   */
   static Text format(final int i) {
     return new Text(String.format("%010d", Integer.valueOf(i)));
   }
   
   /*
+   * This method takes some time and is done inline uploading data.  For
+   * example, doing the mapfile test, generation of the key and value
+   * consumes about 30% of CPU time.
    * @return Generated random value to insert into a table cell.
    */
   static byte[] generateValue(final Random r) {
-    StringBuilder val = new StringBuilder();
-    while(val.length() < ROW_LENGTH) {
-      val.append(Long.toString(r.nextLong()));
-    }
-    byte[] value = null;
-    try {
-      value = val.toString().getBytes(HConstants.UTF8_ENCODING);
-    } catch (UnsupportedEncodingException e) {
-      assert(false);
-    }
-    return value;
+    byte [] b = new byte [ROW_LENGTH];
+    r.nextBytes(b);
+    return b;
   }
   
   static Text getRandomRow(final Random random, final int totalRows) {
@@ -556,7 +558,7 @@
     Random random = new Random();
     Configuration c = new Configuration();
     FileSystem fs = FileSystem.get(c);
-    Path mf = new Path("performanceevaluation.mapfile");
+    Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
     if (fs.exists(mf)) {
       fs.delete(mf);
     }
@@ -571,7 +573,9 @@
     }
     writer.close();
     LOG.info("Writing " + ROW_COUNT + " records took " +
-      (System.currentTimeMillis() - startTime) + "ms");
+      (System.currentTimeMillis() - startTime) + "ms (Note: generation of keys 
" +
+        "and values is done inline and has been seen to consume " +
+        "significant time: e.g. ~30% of cpu time");
     // Do random reads.
     LOG.info("Reading " + ROW_COUNT + " random rows");
     MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c);
@@ -585,7 +589,9 @@
     }
     reader.close();
     LOG.info("Reading " + ROW_COUNT + " random records took " +
-      (System.currentTimeMillis() - startTime) + "ms");
+      (System.currentTimeMillis() - startTime) + "ms (Note: generation of " +
+        "random key is done in line and takes a significant amount of cpu " +
+        "time: e.g 10-15%");
     // Do random reads.
     LOG.info("Reading " + ROW_COUNT + " rows sequentially");
     reader = new MapFile.Reader(fs, mf.toString(), c);
@@ -599,7 +605,7 @@
     LOG.info("Reading " + ROW_COUNT + " records serially took " +
       (System.currentTimeMillis() - startTime) + "ms");
   }
-  
+
   private void runTest(final String cmd) throws IOException {
     if (cmd.equals(RANDOM_READ_MEM)) {
       // For this one test, so all fits in memory, make R smaller (See

svn commit: r606332 - in /lucene/hadoop/trunk/src/contrib/hbase: ./ src/java/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/

Reply via email to