Author: jlowe Date: Mon Nov 11 19:22:38 2013 New Revision: 1540813 URL: http://svn.apache.org/r1540813 Log: MAPREDUCE-5186. mapreduce.job.max.split.locations causes some splits created by CombineFileInputFormat to fail. Contributed by Robert Parker and Jason Lowe
Added: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java (with props) Removed: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestBlockLimits.java Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1540813&r1=1540812&r2=1540813&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Mon Nov 11 19:22:38 2013 @@ -197,6 +197,10 @@ Release 2.3.0 - UNRELEASED MAPREDUCE-5585. TestCopyCommitter#testNoCommitAction Fails on JDK7 (jeagles) + MAPREDUCE-5186. mapreduce.job.max.split.locations causes some splits + created by CombineFileInputFormat to fail (Robert Parker and Jason Lowe + via jlowe) + Release 2.2.1 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java?rev=1540813&r1=1540812&r2=1540813&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java Mon Nov 11 19:22:38 2013 @@ -20,6 +20,7 @@ package org.apache.hadoop.mapreduce.spli import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -39,6 +40,9 @@ import org.apache.hadoop.mapreduce.split import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + /** * The class that is used by the Job clients to write splits (both the meta * and the raw bytes parts) @@ -47,6 +51,7 @@ import org.apache.hadoop.classification. @InterfaceStability.Unstable public class JobSplitWriter { + private static final Log LOG = LogFactory.getLog(JobSplitWriter.class); private static final int splitVersion = JobSplit.META_SPLIT_VERSION; private static final byte[] SPLIT_FILE_HEADER; @@ -129,9 +134,10 @@ public class JobSplitWriter { long currCount = out.getPos(); String[] locations = split.getLocations(); if (locations.length > maxBlockLocations) { - throw new IOException("Max block location exceeded for split: " + LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length + " maxsize: " + maxBlockLocations); + locations = Arrays.copyOf(locations, maxBlockLocations); } info[i++] = new JobSplit.SplitMetaInfo( @@ -159,9 +165,10 @@ public class JobSplitWriter { long currLen = out.getPos(); String[] locations = split.getLocations(); if (locations.length > maxBlockLocations) { - throw new IOException("Max block location exceeded for split: " + LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length + " maxsize: " + maxBlockLocations); + locations = Arrays.copyOf(locations,maxBlockLocations); } info[i++] = new JobSplit.SplitMetaInfo( locations, offset, Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml?rev=1540813&r1=1540812&r2=1540813&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml (original) +++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml Mon Nov 11 19:22:38 2013 @@ -83,6 +83,14 @@ </property> <property> + <name>mapreduce.job.max.split.locations</name> + <value>10</value> + <description>The max number of block locations to store for each split for + locality calculation. + </description> +</property> + +<property> <name>mapreduce.job.split.metainfo.maxsize</name> <value>10000000</value> <description>The maximum permissible size of the split metainfo file. Added: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java?rev=1540813&view=auto ============================================================================== --- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java (added) +++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java Mon Nov 11 19:22:38 2013 @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.split; + +import static org.junit.Assert.assertEquals; + +import java.io.File; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.MRConfig; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.junit.Test; + +public class TestJobSplitWriter { + + private static final File TEST_DIR = new File( + System.getProperty("test.build.data", + System.getProperty("java.io.tmpdir")), "TestJobSplitWriter"); + + @Test + public void testMaxBlockLocationsNewSplits() throws Exception { + TEST_DIR.mkdirs(); + try { + Configuration conf = new Configuration(); + conf.setInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, 4); + Path submitDir = new Path(TEST_DIR.getAbsolutePath()); + FileSystem fs = FileSystem.getLocal(conf); + FileSplit split = new FileSplit(new Path("/some/path"), 0, 1, + new String[] { "loc1", "loc2", "loc3", "loc4", "loc5" }); + JobSplitWriter.createSplitFiles(submitDir, conf, fs, + new FileSplit[] { split }); + JobSplit.TaskSplitMetaInfo[] infos = + SplitMetaInfoReader.readSplitMetaInfo(new JobID(), fs, conf, + submitDir); + assertEquals("unexpected number of splits", 1, infos.length); + assertEquals("unexpected number of split locations", + 4, infos[0].getLocations().length); + } finally { + FileUtil.fullyDelete(TEST_DIR); + } + } + + @Test + public void testMaxBlockLocationsOldSplits() throws Exception { + TEST_DIR.mkdirs(); + try { + Configuration conf = new Configuration(); + conf.setInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, 4); + Path submitDir = new Path(TEST_DIR.getAbsolutePath()); + FileSystem fs = FileSystem.getLocal(conf); + org.apache.hadoop.mapred.FileSplit split = + new org.apache.hadoop.mapred.FileSplit(new Path("/some/path"), 0, 1, + new String[] { "loc1", "loc2", "loc3", "loc4", "loc5" }); + JobSplitWriter.createSplitFiles(submitDir, conf, fs, + new org.apache.hadoop.mapred.InputSplit[] { split }); + JobSplit.TaskSplitMetaInfo[] infos = + SplitMetaInfoReader.readSplitMetaInfo(new JobID(), fs, conf, + submitDir); + assertEquals("unexpected number of splits", 1, infos.length); + assertEquals("unexpected number of split locations", + 4, infos[0].getLocations().length); + } finally { + FileUtil.fullyDelete(TEST_DIR); + } + } +} Propchange: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java ------------------------------------------------------------------------------ svn:eol-style = native