Author: stack
Date: Wed Sep 15 23:57:15 2010
New Revision: 997544

URL: http://svn.apache.org/viewvc?rev=997544&view=rev
Log:
HBASE-2899 hfile.min.blocksize.size ignored/documentation wrong

Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
    
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java
    hbase/trunk/src/main/resources/hbase-default.xml

Modified: hbase/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=997544&r1=997543&r2=997544&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Wed Sep 15 23:57:15 2010
@@ -522,6 +522,7 @@ Release 0.21.0 - Unreleased
                (Alex Newman via Todd Lipcon)
    HBASE-2986  multi writable can npe causing client hang
    HBASE-2979  Fix failing TestMultParrallel in hudson build
+   HBASE-2899  hfile.min.blocksize.size ignored/documentation wrong
 
   IMPROVEMENTS
    HBASE-1760  Cleanup TODOs in HTable

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java?rev=997544&r1=997543&r2=997544&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java 
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java 
Wed Sep 15 23:57:15 2010
@@ -74,7 +74,14 @@ public class HColumnDescriptor implement
   public static final String COMPRESSION = "COMPRESSION";
   public static final String COMPRESSION_COMPACT = "COMPRESSION_COMPACT";
   public static final String BLOCKCACHE = "BLOCKCACHE";
+  
+  /**
+   * Size of storefile/hfile 'blocks'.  Default is {...@link 
#DEFAULT_BLOCKSIZE}.
+   * Use smaller block sizes for faster random-access at expense of larger
+   * indices (more memory consumption).
+   */
   public static final String BLOCKSIZE = "BLOCKSIZE";
+
   public static final String LENGTH = "LENGTH";
   public static final String TTL = "TTL";
   public static final String BLOOMFILTER = "BLOOMFILTER";
@@ -109,8 +116,7 @@ public class HColumnDescriptor implement
   public static final boolean DEFAULT_BLOCKCACHE = true;
 
   /**
-   * Default size of blocks in files store to the filesytem.  Use smaller for
-   * faster random-access at expense of larger indices (more memory 
consumption).
+   * Default size of blocks in files stored to the filesytem (hfiles).
    */
   public static final int DEFAULT_BLOCKSIZE = HFile.DEFAULT_BLOCKSIZE;
 
@@ -224,7 +230,9 @@ public class HColumnDescriptor implement
    * @param inMemory If true, column data should be kept in an HRegionServer's
    * cache
    * @param blockCacheEnabled If true, MapFile blocks should be cached
-   * @param blocksize
+   * @param blocksize Block size to use when writing out storefiles.  Use
+   * smaller blocksizes for faster random-access at expense of larger indices
+   * (more memory consumption).  Default is usually 64k.
    * @param timeToLive Time-to-live of cell contents, in seconds
    * (use HConstants.FOREVER for unlimited TTL)
    * @param bloomFilter Bloom filter type for this column
@@ -385,7 +393,7 @@ public class HColumnDescriptor implement
   }
 
   /**
-   * @return Blocksize.
+   * @return The storefile/hfile blocksize for this column family.
    */
   public synchronized int getBlocksize() {
     if (this.blocksize == null) {
@@ -397,7 +405,8 @@ public class HColumnDescriptor implement
   }
 
   /**
-   * @param s
+   * @param s Blocksize to use when writing out storefiles/hfiles on this
+   * column family.
    */
   public void setBlocksize(int s) {
     setValue(BLOCKSIZE, Integer.toString(s));

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java?rev=997544&r1=997543&r2=997544&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java
 (original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java
 Wed Sep 15 23:57:15 2010
@@ -75,7 +75,8 @@ public class HFileOutputFormat extends F
     final FileSystem fs = outputdir.getFileSystem(conf);
     // These configs. are from hbase-*.xml
     final long maxsize = conf.getLong("hbase.hregion.max.filesize", 268435456);
-    final int blocksize = conf.getInt("hfile.min.blocksize.size", 65536);
+    final int blocksize =
+      conf.getInt("hbase.mapreduce.hfileoutputformat.blocksize", 65536);
     // Invented config.  Add to hbase-*.xml if other than default compression.
     final String compression = conf.get("hfile.compression",
       Compression.Algorithm.NONE.getName());

Modified: hbase/trunk/src/main/resources/hbase-default.xml
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/resources/hbase-default.xml?rev=997544&r1=997543&r2=997544&view=diff
==============================================================================
--- hbase/trunk/src/main/resources/hbase-default.xml (original)
+++ hbase/trunk/src/main/resources/hbase-default.xml Wed Sep 15 23:57:15 2010
@@ -404,11 +404,16 @@
     </description>
   </property>
   <property>
-    <name>hfile.min.blocksize.size</name>
+    <name>hbase.mapreduce.hfileoutputformat.blocksize</name>
     <value>65536</value>
-    <description>Minimum store file block size.  The smaller you make this, the
-    bigger your index and the less you fetch on a random-access.  Set size down
-    if you have small cells and want faster random-access of individual cells.
+    <description>The mapreduce HFileOutputFormat writes storefiles/hfiles.
+    This is the minimum hfile blocksize to emit.  Usually in hbase, writing
+    hfiles, the blocksize is gotten from the table schema (HColumnDescriptor)
+    but in the mapreduce outputformat context, we don't have access to the
+    schema so get blocksize from Configuation.  The smaller you make
+    the blocksize, the bigger your index and the less you fetch on a
+    random-access.  Set the blocksize down if you have small cells and want
+    faster random-access of individual cells.
     </description>
   </property>
   <property>


Reply via email to