Author: srowen
Date: Tue Feb 22 21:17:35 2011
New Revision: 1073511

URL: http://svn.apache.org/viewvc?rev=1073511&view=rev
Log:
MAHOUT-614 fix up overriding of Hadoop's FileOutputFormat

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java?rev=1073511&r1=1073510&r2=1073511&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java
 Tue Feb 22 21:17:35 2011
@@ -22,12 +22,10 @@ import java.io.IOException;
 import java.util.TreeMap;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Progressable;
 
 /**
  * This abstract class extends the FileOutputFormat, allowing to write the
@@ -52,23 +50,10 @@ public abstract class MultipleOutputForm
   /**
    * Create a composite record writer that can write key/value data to 
different
    * output files
-   * 
-   * @param fs
-   *          the file system to use
-   * @param job
-   *          the job conf for the job
-   * @param name
-   *          the leaf file name for the output file (such as part-00000")
-   * @param arg3
-   *          a progressable for reporting progress.
    * @return a composite record writer
    */
-  public RecordWriter<K, V> getRecordWriter(FileSystem fs, Configuration job, 
String name, Progressable arg3) {
-
-    final FileSystem myFS = fs;
-    final String myName = generateLeafFileName(name);
-    final Configuration myJob = job;
-    final Progressable myProgressable = arg3;
+  @Override
+  public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) {
 
     return new RecordWriter<K, V>() {
 
@@ -76,13 +61,13 @@ public abstract class MultipleOutputForm
       private final TreeMap<String, RecordWriter<K, V>> recordWriters = new 
TreeMap<String, RecordWriter<K, V>>();
 
       @Override
-      public void write(K key, V value) throws IOException {
+      public void write(K key, V value) throws IOException, 
InterruptedException {
 
         // get the file name based on the key
-        String keyBasedPath = generateFileNameForKeyValue(key, value, myName);
+        String keyBasedPath = generateFileNameForKeyValue(key, value, 
generateLeafFileName(null));
 
         // get the file name based on the input file name
-        String finalPath = getInputFileBasedOutputFileName(myJob, 
keyBasedPath);
+        String finalPath = 
getInputFileBasedOutputFileName(context.getConfiguration(), keyBasedPath);
 
         // get the actual key
         K actualKey = generateActualKey(key, value);
@@ -93,7 +78,7 @@ public abstract class MultipleOutputForm
           // if we don't have the record writer yet for the final path, create
           // one
           // and add it to the cache
-          rw = getBaseRecordWriter(myFS, myJob, finalPath, myProgressable);
+          rw = getBaseRecordWriter(context.getConfiguration());
           this.recordWriters.put(finalPath, rw);
         }
         try {
@@ -177,19 +162,19 @@ public abstract class MultipleOutputForm
    * N trailing legs of the input file name where N is the config value for
    * "num.of.trailing.legs.to.use".
    * 
-   * @param job
+   * @param conf
    *          the job config
    * @param name
    *          the output file name
    * @return the outfile name based on a given anme and the input file name.
    */
-  protected String getInputFileBasedOutputFileName(Configuration job, String 
name) {
-    String infilepath = job.get("map.input.file");
+  protected String getInputFileBasedOutputFileName(Configuration conf, String 
name) {
+    String infilepath = conf.get("map.input.file");
     if (infilepath == null) {
       // if the map input file does not exists, then return the given name
       return name;
     }
-    int numOfTrailingLegsToUse = 
job.getInt("mapred.outputformat.numOfTrailingLegs", 0);
+    int numOfTrailingLegsToUse = 
conf.getInt("mapred.outputformat.numOfTrailingLegs", 0);
     if (numOfTrailingLegsToUse <= 0) {
       return name;
     }
@@ -212,21 +197,8 @@ public abstract class MultipleOutputForm
   }
 
   /**
-   * 
-   * @param fs
-   *          the file system to use
-   * @param job
-   *          a job conf object
-   * @param name
-   *          the name of the file over which a record writer object will be
-   *          constructed
-   * @param arg3
-   *          a progressable object
    * @return A RecordWriter object over the given file
-   * @throws IOException
    */
-  protected abstract RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
-                                                            Configuration job,
-                                                            String name,
-                                                            Progressable arg3) 
throws IOException;
+  protected abstract RecordWriter<K, V> getBaseRecordWriter(Configuration conf)
+    throws IOException, InterruptedException;
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java?rev=1073511&r1=1073510&r2=1073511&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java
 Tue Feb 22 21:17:35 2011
@@ -20,12 +20,10 @@ package org.apache.mahout.classifier.bay
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.Progressable;
 
 /**
  * This class extends the MultipleOutputFormat, allowing to write the output
@@ -36,24 +34,11 @@ public class MultipleTextOutputFormat<K,
   private TextOutputFormat<K, V> theTextOutputFormat;
 
   @Override
-  protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs, 
Configuration conf, String name, Progressable arg3)
-      throws IOException {
+  protected RecordWriter<K, V> getBaseRecordWriter(Configuration conf) throws 
IOException, InterruptedException {
     if (theTextOutputFormat == null) {
       theTextOutputFormat = new TextOutputFormat<K, V>();
     }
-    try {
-      return theTextOutputFormat.getRecordWriter(new TaskAttemptContext(conf, 
new TaskAttemptID()));
-    } catch (InterruptedException e) {
-      // continue
-    }
-    return null;
+    return theTextOutputFormat.getRecordWriter(new TaskAttemptContext(conf, 
new TaskAttemptID()));
   }
 
-  @Override
-  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws 
IOException, InterruptedException {
-    if (theTextOutputFormat == null) {
-      theTextOutputFormat = new TextOutputFormat<K, V>();
-    }
-    return theTextOutputFormat.getRecordWriter(job);
-  }
 }


Reply via email to