Author: cutting Date: Tue Mar 21 09:37:45 2006 New Revision: 387587 URL: http://svn.apache.org/viewcvs?rev=387587&view=rev Log: Fix for HADOOP-93. Convert min split size from int to long, and permit its specification in the config.
Modified: lucene/hadoop/trunk/conf/hadoop-default.xml lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java Modified: lucene/hadoop/trunk/conf/hadoop-default.xml URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/conf/hadoop-default.xml?rev=387587&r1=387586&r2=387587&view=diff ============================================================================== --- lucene/hadoop/trunk/conf/hadoop-default.xml (original) +++ lucene/hadoop/trunk/conf/hadoop-default.xml Tue Mar 21 09:37:45 2006 @@ -221,6 +221,14 @@ be executed in parallel.</description> </property> +<property> + <name>mapred.min.split.size</name> + <value>0</value> + <description>The minimum size chunk that map input should be split + into. Note that some file formats may have minimum split sizes that + take priority over this setting.</description> +</property> + <!-- ipc properties --> Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java?rev=387587&r1=387586&r2=387587&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java Tue Mar 21 09:37:45 2006 @@ -33,9 +33,9 @@ private static final double SPLIT_SLOP = 0.1; // 10% slop - private int minSplitSize = 1; + private long minSplitSize = 1; - protected void setMinSplitSize(int minSplitSize) { + protected void setMinSplitSize(long minSplitSize) { this.minSplitSize = minSplitSize; } @@ -112,8 +112,11 @@ bytesPerSplit = fsBlockSize; } - if (bytesPerSplit < minSplitSize) { // no smaller than min size - bytesPerSplit = minSplitSize; + long configuredMinSplitSize = job.getLong("mapred.min.split.size", 0); + if( configuredMinSplitSize < minSplitSize ) + configuredMinSplitSize = minSplitSize; + if (bytesPerSplit < configuredMinSplitSize) { // no smaller than min size + bytesPerSplit = configuredMinSplitSize; } long maxPerSplit = bytesPerSplit + (long)(bytesPerSplit*SPLIT_SLOP); @@ -135,7 +138,9 @@ if (bytesRemaining != 0) { splits.add(new FileSplit(file, length-bytesRemaining, bytesRemaining)); } + //LOG.info( "Generating splits for " + i + "th file: " + file.getName() ); } + //LOG.info( "Total # of splits: " + splits.size() ); return (FileSplit[])splits.toArray(new FileSplit[splits.size()]); }