TestCombineFileInputFormat.java

todd Tue, 07 Jun 2011 15:09:05 -0700

Author: todd
Date: Tue Jun  7 22:08:15 2011
New Revision: 1133176

URL: http://svn.apache.org/viewvc?rev=1133176&view=rev
Log:
MAPREDUCE-2571. CombineFileInputFormat.getSplits throws a 
java.lang.ArrayStoreException. Contributed by Bochun Bai.


Added:
    
hadoop/mapreduce/branches/branch-0.22/src/test/mapred/org/apache/hadoop/mapred/TestCombineFileInputFormat.java
Modified:
    hadoop/mapreduce/branches/branch-0.22/CHANGES.txt
    
hadoop/mapreduce/branches/branch-0.22/src/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java

Modified: hadoop/mapreduce/branches/branch-0.22/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.22/CHANGES.txt?rev=1133176&r1=1133175&r2=1133176&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.22/CHANGES.txt (original)
+++ hadoop/mapreduce/branches/branch-0.22/CHANGES.txt Tue Jun  7 22:08:15 2011
@@ -581,6 +581,9 @@ Release 0.22.0 - Unreleased
     MAPREDUCE-2487. ChainReducer uses MAPPER_BY_VALUE instead of
     REDUCER_BY_VALUE. (Devaraj K via todd)
 
+    MAPREDUCE-2571. CombineFileInputFormat.getSplits throws a
+    java.lang.ArrayStoreException. (Bochun Bai via todd)
+
 Release 0.21.1 - Unreleased
 
   NEW FEATURES

Modified: 
hadoop/mapreduce/branches/branch-0.22/src/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
URL: 
http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.22/src/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java?rev=1133176&r1=1133175&r2=1133176&view=diff
==============================================================================
--- 
hadoop/mapreduce/branches/branch-0.22/src/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
 (original)
+++ 
hadoop/mapreduce/branches/branch-0.22/src/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
 Tue Jun  7 22:08:15 2011
@@ -68,7 +68,17 @@ public abstract class CombineFileInputFo
 
   public InputSplit[] getSplits(JobConf job, int numSplits) 
     throws IOException {
-    return super.getSplits(new Job(job)).toArray(new InputSplit[0]);
+    List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits =
+      super.getSplits(new Job(job));
+    InputSplit[] ret = new InputSplit[newStyleSplits.size()];
+    for(int pos = 0; pos < newStyleSplits.size(); ++pos) {
+      org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = 
+        (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) 
newStyleSplits.get(pos);
+      ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(),
+        newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(),
+        newStyleSplit.getLocations());
+    }
+    return ret;
   }
   
   /**

Added: 
hadoop/mapreduce/branches/branch-0.22/src/test/mapred/org/apache/hadoop/mapred/TestCombineFileInputFormat.java
URL: 
http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.22/src/test/mapred/org/apache/hadoop/mapred/TestCombineFileInputFormat.java?rev=1133176&view=auto
==============================================================================
--- 
hadoop/mapreduce/branches/branch-0.22/src/test/mapred/org/apache/hadoop/mapred/TestCombineFileInputFormat.java
 (added)
+++ 
hadoop/mapreduce/branches/branch-0.22/src/test/mapred/org/apache/hadoop/mapred/TestCombineFileInputFormat.java
 Tue Jun  7 22:08:15 2011
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapred.lib.CombineFileInputFormat;
+import org.apache.hadoop.mapred.lib.CombineFileSplit;
+import org.apache.hadoop.mapred.lib.CombineFileRecordReader;
+
+import org.junit.Test;
+import static junit.framework.Assert.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class TestCombineFileInputFormat {
+  private static final Log LOG =
+    LogFactory.getLog(TestCombineFileInputFormat.class.getName());
+  
+  private static JobConf defaultConf = new JobConf();
+  private static FileSystem localFs = null; 
+  static {
+    try {
+      defaultConf.set("fs.default.name", "file:///");
+      localFs = FileSystem.getLocal(defaultConf);
+    } catch (IOException e) {
+      throw new RuntimeException("init failure", e);
+    }
+  }
+  private static Path workDir =
+    new Path(new Path(System.getProperty("test.build.data", "/tmp")),
+             "TestCombineFileInputFormat").makeQualified(localFs);
+
+  private static void writeFile(FileSystem fs, Path name, 
+                                String contents) throws IOException {
+    OutputStream stm;
+    stm = fs.create(name);
+    stm.write(contents.getBytes());
+    stm.close();
+  }
+  
+  /**
+   * Test getSplits
+   */
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testSplits() throws IOException {
+    JobConf job = new JobConf(defaultConf);
+    localFs.delete(workDir, true);
+    writeFile(localFs, new Path(workDir, "test.txt"), 
+              "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
+    FileInputFormat.setInputPaths(job, workDir);
+    CombineFileInputFormat format = new CombineFileInputFormat() {
+      @Override
+      public RecordReader getRecordReader(InputSplit split, JobConf job, 
Reporter reporter) throws IOException {
+        return new CombineFileRecordReader(job, (CombineFileSplit)split, 
reporter, CombineFileRecordReader.class);
+      }
+    };
+    final int SIZE_SPLITS = 1;
+    LOG.info("Trying to getSplits with splits = " + SIZE_SPLITS);
+    InputSplit[] splits = format.getSplits(job, SIZE_SPLITS);
+    LOG.info("Got getSplits = " + splits.length);
+    assertEquals("splits == " + SIZE_SPLITS, SIZE_SPLITS, splits.length);
+  }
+}

svn commit: r1133176 - in /hadoop/mapreduce/branches/branch-0.22: CHANGES.txt src/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java src/test/mapred/org/apache/hadoop/mapred/TestCombineFileInputFormat.java

Reply via email to