Author: cutting
Date: Tue Mar 21 09:37:45 2006
New Revision: 387587

URL: http://svn.apache.org/viewcvs?rev=387587&view=rev
Log:
Fix for HADOOP-93.  Convert min split size from int to long, and permit its 
specification in the config.

Modified:
    lucene/hadoop/trunk/conf/hadoop-default.xml
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java

Modified: lucene/hadoop/trunk/conf/hadoop-default.xml
URL: 
http://svn.apache.org/viewcvs/lucene/hadoop/trunk/conf/hadoop-default.xml?rev=387587&r1=387586&r2=387587&view=diff
==============================================================================
--- lucene/hadoop/trunk/conf/hadoop-default.xml (original)
+++ lucene/hadoop/trunk/conf/hadoop-default.xml Tue Mar 21 09:37:45 2006
@@ -221,6 +221,14 @@
   be executed in parallel.</description>
 </property>
 
+<property>
+  <name>mapred.min.split.size</name>
+  <value>0</value>
+  <description>The minimum size chunk that map input should be split
+  into.  Note that some file formats may have minimum split sizes that
+  take priority over this setting.</description>
+</property>
+
 
 <!-- ipc properties -->
 

Modified: 
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java
URL: 
http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java?rev=387587&r1=387586&r2=387587&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java 
(original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java 
Tue Mar 21 09:37:45 2006
@@ -33,9 +33,9 @@
 
   private static final double SPLIT_SLOP = 0.1;   // 10% slop
 
-  private int minSplitSize = 1;
+  private long minSplitSize = 1;
 
-  protected void setMinSplitSize(int minSplitSize) {
+  protected void setMinSplitSize(long minSplitSize) {
     this.minSplitSize = minSplitSize;
   }
 
@@ -112,8 +112,11 @@
       bytesPerSplit = fsBlockSize;
     }
 
-    if (bytesPerSplit < minSplitSize) {           // no smaller than min size
-      bytesPerSplit = minSplitSize;
+    long configuredMinSplitSize = job.getLong("mapred.min.split.size", 0);
+    if( configuredMinSplitSize < minSplitSize )
+       configuredMinSplitSize = minSplitSize;
+    if (bytesPerSplit < configuredMinSplitSize) { // no smaller than min size
+      bytesPerSplit = configuredMinSplitSize;
     }
 
     long maxPerSplit = bytesPerSplit + (long)(bytesPerSplit*SPLIT_SLOP);
@@ -135,7 +138,9 @@
       if (bytesRemaining != 0) {
         splits.add(new FileSplit(file, length-bytesRemaining, bytesRemaining));
       }
+      //LOG.info( "Generating splits for " + i + "th file: " + file.getName() 
);
     }
+    //LOG.info( "Total # of splits: " + splits.size() );
     return (FileSplit[])splits.toArray(new FileSplit[splits.size()]);
   }
 


Reply via email to